3 /* Table of constant values */
5 static integer c__6 = 6;
6 static integer c_n1 = -1;
7 static integer c__9 = 9;
8 static integer c__0 = 0;
9 static integer c__1 = 1;
10 static doublereal c_b82 = 0.;
12 /* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
13 doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
14 s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
15 integer *iwork, integer *info)
17 /* System generated locals */
18 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
20 /* Builtin functions */
21 double log(doublereal);
25 doublereal eps, anrm, bnrm;
26 integer itau, nlvl, iascl, ibscl;
28 integer minmn, maxmn, itaup, itauq, mnthr, nwork;
29 extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_(
30 integer *, integer *, doublereal *, integer *, doublereal *,
31 doublereal *, doublereal *, doublereal *, doublereal *, integer *,
33 extern doublereal dlamch_(char *), dlange_(char *, integer *,
34 integer *, doublereal *, integer *, doublereal *);
35 extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
36 integer *, doublereal *, doublereal *, integer *, integer *),
37 dlalsd_(char *, integer *, integer *, integer *, doublereal *,
38 doublereal *, doublereal *, integer *, doublereal *, integer *,
39 doublereal *, integer *, integer *), dlascl_(char *,
40 integer *, integer *, doublereal *, doublereal *, integer *,
41 integer *, doublereal *, integer *, integer *), dgeqrf_(
42 integer *, integer *, doublereal *, integer *, doublereal *,
43 doublereal *, integer *, integer *), dlacpy_(char *, integer *,
44 integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
45 doublereal *, doublereal *, integer *), xerbla_(char *,
47 extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
48 integer *, integer *);
50 extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
51 integer *, integer *, doublereal *, integer *, doublereal *,
52 doublereal *, integer *, doublereal *, integer *, integer *);
54 extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
55 integer *, doublereal *, integer *, doublereal *, doublereal *,
56 integer *, doublereal *, integer *, integer *);
58 extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
59 integer *, doublereal *, integer *, doublereal *, doublereal *,
60 integer *, doublereal *, integer *, integer *);
61 integer minwrk, maxwrk;
67 /* -- LAPACK driver routine (version 3.1) -- */
68 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
71 /* .. Scalar Arguments .. */
73 /* .. Array Arguments .. */
79 /* DGELSD computes the minimum-norm solution to a real linear least */
80 /* squares problem: */
81 /* minimize 2-norm(| b - A*x |) */
82 /* using the singular value decomposition (SVD) of A. A is an M-by-N */
83 /* matrix which may be rank-deficient. */
85 /* Several right hand side vectors b and solution vectors x can be */
86 /* handled in a single call; they are stored as the columns of the */
87 /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
90 /* The problem is solved in three steps: */
91 /* (1) Reduce the coefficient matrix A to bidiagonal form with */
92 /* Householder transformations, reducing the original problem */
93 /* into a "bidiagonal least squares problem" (BLS) */
94 /* (2) Solve the BLS using a divide and conquer approach. */
95 /* (3) Apply back all the Householder tranformations to solve */
96 /* the original least squares problem. */
98 /* The effective rank of A is determined by treating as zero those */
99 /* singular values which are less than RCOND times the largest singular */
102 /* The divide and conquer algorithm makes very mild assumptions about */
103 /* floating point arithmetic. It will work on machines with a guard */
104 /* digit in add/subtract, or on those binary machines without guard */
105 /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
106 /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */
107 /* without guard digits, but we know of none. */
112 /* M (input) INTEGER */
113 /* The number of rows of A. M >= 0. */
115 /* N (input) INTEGER */
116 /* The number of columns of A. N >= 0. */
118 /* NRHS (input) INTEGER */
119 /* The number of right hand sides, i.e., the number of columns */
120 /* of the matrices B and X. NRHS >= 0. */
122 /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
123 /* On entry, the M-by-N matrix A. */
124 /* On exit, A has been destroyed. */
126 /* LDA (input) INTEGER */
127 /* The leading dimension of the array A. LDA >= max(1,M). */
129 /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */
130 /* On entry, the M-by-NRHS right hand side matrix B. */
131 /* On exit, B is overwritten by the N-by-NRHS solution */
132 /* matrix X. If m >= n and RANK = n, the residual */
133 /* sum-of-squares for the solution in the i-th column is given */
134 /* by the sum of squares of elements n+1:m in that column. */
136 /* LDB (input) INTEGER */
137 /* The leading dimension of the array B. LDB >= max(1,max(M,N)). */
139 /* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */
140 /* The singular values of A in decreasing order. */
141 /* The condition number of A in the 2-norm = S(1)/S(min(m,n)). */
143 /* RCOND (input) DOUBLE PRECISION */
144 /* RCOND is used to determine the effective rank of A. */
145 /* Singular values S(i) <= RCOND*S(1) are treated as zero. */
146 /* If RCOND < 0, machine precision is used instead. */
148 /* RANK (output) INTEGER */
149 /* The effective rank of A, i.e., the number of singular values */
150 /* which are greater than RCOND*S(1). */
152 /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
153 /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
155 /* LWORK (input) INTEGER */
156 /* The dimension of the array WORK. LWORK must be at least 1. */
157 /* The exact minimum amount of workspace needed depends on M, */
158 /* N and NRHS. As long as LWORK is at least */
159 /* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */
160 /* if M is greater than or equal to N or */
161 /* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */
162 /* if M is less than N, the code will execute correctly. */
163 /* SMLSIZ is returned by ILAENV and is equal to the maximum */
164 /* size of the subproblems at the bottom of the computation */
165 /* tree (usually about 25), and */
166 /* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */
167 /* For good performance, LWORK should generally be larger. */
169 /* If LWORK = -1, then a workspace query is assumed; the routine */
170 /* only calculates the optimal size of the WORK array, returns */
171 /* this value as the first entry of the WORK array, and no error */
172 /* message related to LWORK is issued by XERBLA. */
174 /* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */
175 /* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, */
176 /* where MINMN = MIN( M,N ). */
178 /* INFO (output) INTEGER */
179 /* = 0: successful exit */
180 /* < 0: if INFO = -i, the i-th argument had an illegal value. */
181 /* > 0: the algorithm for computing the SVD failed to converge; */
182 /* if INFO = i, i off-diagonal elements of an intermediate */
183 /* bidiagonal form did not converge to zero. */
185 /* Further Details */
186 /* =============== */
188 /* Based on contributions by */
189 /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */
190 /* California at Berkeley, USA */
191 /* Osni Marques, LBNL/NERSC, USA */
193 /* ===================================================================== */
195 /* .. Parameters .. */
197 /* .. Local Scalars .. */
199 /* .. External Subroutines .. */
201 /* .. External Functions .. */
203 /* .. Intrinsic Functions .. */
205 /* .. Executable Statements .. */
207 /* Test the input arguments. */
209 /* Parameter adjustments */
211 a_offset = 1 + a_dim1;
214 b_offset = 1 + b_dim1;
224 mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1);
225 lquery = *lwork == -1;
230 } else if (*nrhs < 0) {
232 } else if (*lda < max(1,*m)) {
234 } else if (*ldb < max(1,maxmn)) {
238 smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0);
240 /* Compute workspace. */
241 /* (Note: Comments in the code beginning "Workspace:" describe the */
242 /* minimal amount of workspace needed at that point in the code, */
243 /* as well as the preferred amount for good performance. */
244 /* NB refers to the optimal block size for the immediately */
245 /* following subroutine, as returned by ILAENV.) */
248 minmn = max(1,minmn);
250 i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
257 if (*m >= *n && *m >= mnthr) {
259 /* Path 1a - overdetermined, with many more rows than columns. */
263 i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
265 maxwrk = max(i__1,i__2);
267 i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
269 maxwrk = max(i__1,i__2);
273 /* Path 1 - overdetermined or exactly determined. */
276 i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
277 , " ", &mm, n, &c_n1, &c_n1);
278 maxwrk = max(i__1,i__2);
280 i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
281 "QLT", &mm, nrhs, n, &c_n1);
282 maxwrk = max(i__1,i__2);
284 i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
285 "PLN", n, nrhs, n, &c_n1);
286 maxwrk = max(i__1,i__2);
287 /* Computing 2nd power */
289 wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
292 i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
293 maxwrk = max(i__1,i__2);
295 i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
296 i__2 = *n * 3 + wlalsd;
297 minwrk = max(i__1,i__2);
300 /* Computing 2nd power */
302 wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
306 /* Path 2a - underdetermined, with many more columns */
309 maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
312 i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
313 ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1);
314 maxwrk = max(i__1,i__2);
316 i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
317 c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1);
318 maxwrk = max(i__1,i__2);
320 i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
321 ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1);
322 maxwrk = max(i__1,i__2);
325 i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
326 maxwrk = max(i__1,i__2);
329 i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
330 maxwrk = max(i__1,i__2);
333 i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
334 "LT", n, nrhs, m, &c_n1);
335 maxwrk = max(i__1,i__2);
337 i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
338 maxwrk = max(i__1,i__2);
341 /* Path 2 - remaining underdetermined cases. */
343 maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
346 i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
347 , "QLT", m, nrhs, n, &c_n1);
348 maxwrk = max(i__1,i__2);
350 i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR",
351 "PLN", n, nrhs, m, &c_n1);
352 maxwrk = max(i__1,i__2);
354 i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
355 maxwrk = max(i__1,i__2);
358 i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
359 i__2 = *m * 3 + wlalsd;
360 minwrk = max(i__1,i__2);
362 minwrk = min(minwrk,maxwrk);
363 work[1] = (doublereal) maxwrk;
364 if (*lwork < minwrk && ! lquery) {
371 xerbla_("DGELSD", &i__1);
377 /* Quick return if possible. */
379 if (*m == 0 || *n == 0) {
384 /* Get machine parameters. */
387 sfmin = dlamch_("S");
388 smlnum = sfmin / eps;
389 bignum = 1. / smlnum;
390 dlabad_(&smlnum, &bignum);
392 /* Scale A if max entry outside range [SMLNUM,BIGNUM]. */
394 anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
396 if (anrm > 0. && anrm < smlnum) {
398 /* Scale matrix norm up to SMLNUM. */
400 dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
403 } else if (anrm > bignum) {
405 /* Scale matrix norm down to BIGNUM. */
407 dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
410 } else if (anrm == 0.) {
412 /* Matrix all zero. Return zero solution. */
415 dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb);
416 dlaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1);
421 /* Scale B if max entry outside range [SMLNUM,BIGNUM]. */
423 bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
425 if (bnrm > 0. && bnrm < smlnum) {
427 /* Scale matrix norm up to SMLNUM. */
429 dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
432 } else if (bnrm > bignum) {
434 /* Scale matrix norm down to BIGNUM. */
436 dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
441 /* If M < N make sure certain entries of B are zero. */
445 dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb);
448 /* Overdetermined case. */
452 /* Path 1 - overdetermined or exactly determined. */
457 /* Path 1a - overdetermined, with many more rows than columns. */
464 /* (Workspace: need 2*N, prefer N+N*NB) */
466 i__1 = *lwork - nwork + 1;
467 dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
470 /* Multiply B by transpose(Q). */
471 /* (Workspace: need N+NRHS, prefer N+NRHS*NB) */
473 i__1 = *lwork - nwork + 1;
474 dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
475 b_offset], ldb, &work[nwork], &i__1, info);
477 /* Zero out below R. */
482 dlaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2],
492 /* Bidiagonalize R in A. */
493 /* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */
495 i__1 = *lwork - nwork + 1;
496 dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
497 work[itaup], &work[nwork], &i__1, info);
499 /* Multiply B by transpose of left bidiagonalizing vectors of R. */
500 /* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */
502 i__1 = *lwork - nwork + 1;
503 dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
504 &b[b_offset], ldb, &work[nwork], &i__1, info);
506 /* Solve the bidiagonal least squares problem. */
508 dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
509 rcond, rank, &work[nwork], &iwork[1], info);
514 /* Multiply B by right bidiagonalizing vectors of R. */
516 i__1 = *lwork - nwork + 1;
517 dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
518 b[b_offset], ldb, &work[nwork], &i__1, info);
520 } else /* if(complicated condition) */ {
522 i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
523 i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2);
524 if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) {
526 /* Path 2a - underdetermined, with many more columns than rows */
527 /* and sufficient workspace for an efficient algorithm. */
532 i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
533 max(i__3,*nrhs), i__4 = *n - *m * 3;
534 i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
535 *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2)
536 + *m * *lda + wlalsd;
537 if (*lwork >= max(i__1,i__2)) {
544 /* (Workspace: need 2*M, prefer M+M*NB) */
546 i__1 = *lwork - nwork + 1;
547 dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
551 /* Copy L to WORK(IL), zeroing out above its diagonal. */
553 dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
556 dlaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &
558 ie = il + ldwork * *m;
563 /* Bidiagonalize L in WORK(IL). */
564 /* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */
566 i__1 = *lwork - nwork + 1;
567 dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
568 &work[itaup], &work[nwork], &i__1, info);
570 /* Multiply B by transpose of left bidiagonalizing vectors of L. */
571 /* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */
573 i__1 = *lwork - nwork + 1;
574 dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
575 itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
577 /* Solve the bidiagonal least squares problem. */
579 dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
580 ldb, rcond, rank, &work[nwork], &iwork[1], info);
585 /* Multiply B by right bidiagonalizing vectors of L. */
587 i__1 = *lwork - nwork + 1;
588 dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
589 itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
591 /* Zero out below first M rows of B. */
594 dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1],
598 /* Multiply transpose(Q) by B. */
599 /* (Workspace: need M+NRHS, prefer M+NRHS*NB) */
601 i__1 = *lwork - nwork + 1;
602 dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
603 b_offset], ldb, &work[nwork], &i__1, info);
607 /* Path 2 - remaining underdetermined cases. */
614 /* Bidiagonalize A. */
615 /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
617 i__1 = *lwork - nwork + 1;
618 dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
619 work[itaup], &work[nwork], &i__1, info);
621 /* Multiply B by transpose of left bidiagonalizing vectors. */
622 /* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */
624 i__1 = *lwork - nwork + 1;
625 dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
626 , &b[b_offset], ldb, &work[nwork], &i__1, info);
628 /* Solve the bidiagonal least squares problem. */
630 dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
631 ldb, rcond, rank, &work[nwork], &iwork[1], info);
636 /* Multiply B by right bidiagonalizing vectors of A. */
638 i__1 = *lwork - nwork + 1;
639 dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
640 , &b[b_offset], ldb, &work[nwork], &i__1, info);
648 dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
650 dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
652 } else if (iascl == 2) {
653 dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
655 dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
659 dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
661 } else if (ibscl == 2) {
662 dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
667 work[1] = (doublereal) maxwrk;