18 #define DEBUG_FPRINTF(...)
19 #define DEBUG_EXIT(...)
23 void daxpy (
int n,
float da,
float dx[],
int incx,
float dy[],
int incy )
93 if ( incx != 1 || incy != 1 )
101 ix = ( - n + 1 ) * incx;
110 iy = ( - n + 1 ) * incy;
113 for ( i = 0; i < n; i++ )
115 dy[iy] = dy[iy] + da * dx[ix];
127 for ( i = 0; i < m; i++ )
129 dy[i] = dy[i] + da * dx[i];
132 for ( i = m; i < n; i = i + 4 )
134 dy[i ] = dy[i ] + da * dx[i ];
135 dy[i+1] = dy[i+1] + da * dx[i+1];
136 dy[i+2] = dy[i+2] + da * dx[i+2];
137 dy[i+3] = dy[i+3] + da * dx[i+3];
144 float ddot (
int n,
float dx[],
int incx,
float dy[],
int incy )
212 if ( incx != 1 || incy != 1 )
220 ix = ( - n + 1 ) * incx;
229 iy = ( - n + 1 ) * incy;
232 for ( i = 0; i < n; i++ )
234 dtemp = dtemp + dx[ix] * dy[iy];
246 for ( i = 0; i < m; i++ )
248 dtemp = dtemp + dx[i] * dy[i];
251 for ( i = m; i < n; i = i + 5 )
253 dtemp = dtemp + dx[i ] * dy[i ]
264 float dnrm2 (
int n,
float x[],
int incx )
318 if ( n < 1 || incx < 1 )
324 norm = fabs ( x[0] );
332 for ( i = 0; i < n; i++ )
336 absxi = fabs ( x[ix] );
339 ssq = 1.0 + ssq * (
scale / absxi ) * (
scale / absxi );
344 ssq = ssq + ( absxi /
scale ) * ( absxi /
scale );
350 norm =
scale * sqrt ( ssq );
357 void dqrank (
float a[],
int lda,
int m,
int n,
float tol,
int *kr,
358 int jpvt[],
float qraux[] )
438 for ( i = 0; i < n; i++ )
447 dqrdc ( a, lda, m, n, qraux, jpvt, work, job );
452 for ( j = 0; j < k; j++ )
454 if ( fabs ( a[j+j*lda] ) <= tol * fabs ( a[0+0*lda] ) )
465 void dqrdc (
float a[],
int lda,
int n,
int p,
float qraux[],
int jpvt[],
466 float work[],
int job )
566 for ( j = 1; j <=
p; j++ )
568 swapj = ( 0 < jpvt[j-1] );
583 dswap ( n, a+0+(pl-1)*lda, 1, a+0+(j-1), 1 );
585 jpvt[j-1] = jpvt[pl-1];
592 for ( j =
p; 1 <= j; j-- )
596 jpvt[j-1] = -jpvt[j-1];
600 dswap ( n, a+0+(pu-1)*lda, 1, a+0+(j-1)*lda, 1 );
602 jpvt[pu-1] = jpvt[j-1];
612 for ( j = pl; j <= pu; j++ )
614 qraux[j-1] =
dnrm2 ( n, a+0+(j-1)*lda, 1 );
617 for ( j = pl; j <= pu; j++ )
619 work[j-1] = qraux[j-1];
626 for ( l = 1; l <= lup; l++ )
631 if ( pl <= l && l < pu )
635 for ( j = l; j <= pu; j++ )
637 if ( maxnrm < qraux[j-1] )
646 dswap ( n, a+0+(l-1)*lda, 1, a+0+(maxj-1)*lda, 1 );
647 qraux[maxj-1] = qraux[l-1];
648 work[maxj-1] = work[l-1];
650 jpvt[maxj-1] = jpvt[l-1];
661 nrmxl =
dnrm2 ( n-l+1, a+l-1+(l-1)*lda, 1 );
665 if ( a[l-1+(l-1)*lda] != 0.0 )
667 nrmxl = nrmxl *
r8_sign ( a[l-1+(l-1)*lda] );
670 dscal ( n-l+1, 1.0 / nrmxl, a+l-1+(l-1)*lda, 1 );
671 a[l-1+(l-1)*lda] = 1.0 + a[l-1+(l-1)*lda];
675 for ( j = l + 1; j <=
p; j++ )
677 t = -
ddot ( n-l+1, a+l-1+(l-1)*lda, 1, a+l-1+(j-1)*lda, 1 )
679 daxpy ( n-l+1, t, a+l-1+(l-1)*lda, 1, a+l-1+(j-1)*lda, 1 );
681 if ( pl <= j && j <= pu )
683 if ( qraux[j-1] != 0.0 )
685 tt = 1.0 - pow ( fabs ( a[l-1+(j-1)*lda] ) / qraux[j-1], 2 );
688 tt = 1.0 + 0.05 * tt * pow ( qraux[j-1] / work[j-1], 2 );
692 qraux[j-1] = qraux[j-1] * sqrt ( t );
696 qraux[j-1] =
dnrm2 ( n-l, a+l+(j-1)*lda, 1 );
697 work[j-1] = qraux[j-1];
705 qraux[l-1] = a[l-1+(l-1)*lda];
706 a[l-1+(l-1)*lda] = -nrmxl;
714 int dqrls (
float a[],
int lda,
int m,
int n,
float tol,
int *kr,
float b[],
715 float x[],
float rsd[],
int jpvt[],
float qraux[],
int itask )
857 dqrank ( a, lda, m, n, tol, kr, jpvt, qraux );
862 dqrlss ( a, lda, m, n, *kr,
b, x, rsd, jpvt, qraux );
867 void dqrlss (
float a[],
int lda,
int m,
int n,
int kr,
float b[],
float x[],
868 float rsd[],
int jpvt[],
float qraux[] )
951 info =
dqrsl ( a, lda, m, kr, qraux,
b, rsd, rsd, x, rsd, rsd, job );
954 for ( i = 0; i < n; i++ )
959 for ( i = kr; i < n; i++ )
964 for ( j = 1; j <= n; j++ )
966 if ( jpvt[j-1] <= 0 )
976 jpvt[k-1] = -jpvt[k-1];
985 int dqrsl (
float a[],
int lda,
int n,
int k,
float qraux[],
float y[],
986 float qy[],
float qty[],
float b[],
float rsd[],
float ab[],
int job )
1143 cqy = ( job / 10000 != 0 );
1144 cqty = ( ( job % 10000 ) != 0 );
1145 cb = ( ( job % 1000 ) / 100 != 0 );
1146 cr = ( ( job % 100 ) / 10 != 0 );
1147 cab = ( ( job % 10 ) != 0 );
1172 if ( a[0+0*lda] == 0.0 )
1178 b[0] = y[0] / a[0+0*lda];
1193 for ( i = 1; i <= n; i++ )
1201 for ( i = 1; i <= n; i++ )
1211 for ( jj = 1; jj <= ju; jj++ )
1215 if ( qraux[j-1] != 0.0 )
1217 temp = a[j-1+(j-1)*lda];
1218 a[j-1+(j-1)*lda] = qraux[j-1];
1219 t = -
ddot ( n-j+1, a+j-1+(j-1)*lda, 1, qy+j-1, 1 ) / a[j-1+(j-1)*lda];
1220 daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, qy+j-1, 1 );
1221 a[j-1+(j-1)*lda] = temp;
1230 for ( j = 1; j <= ju; j++ )
1232 if ( qraux[j-1] != 0.0 )
1234 temp = a[j-1+(j-1)*lda];
1235 a[j-1+(j-1)*lda] = qraux[j-1];
1236 t = -
ddot ( n-j+1, a+j-1+(j-1)*lda, 1, qty+j-1, 1 ) / a[j-1+(j-1)*lda];
1237 daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, qty+j-1, 1 );
1238 a[j-1+(j-1)*lda] = temp;
1247 for ( i = 1; i <= k; i++ )
1255 for ( i = 1; i <= k; i++ )
1263 for ( i = k+1; i <= n; i++ )
1265 rsd[i-1] = qty[i-1];
1269 if ( cab && k+1 <= n )
1271 for ( i = k+1; i <= n; i++ )
1279 for ( i = 1; i <= k; i++ )
1289 for ( jj = 1; jj <= k; jj++ )
1293 if ( a[j-1+(j-1)*lda] == 0.0 )
1299 b[j-1] =
b[j-1] / a[j-1+(j-1)*lda];
1304 daxpy ( j-1, t, a+0+(j-1)*lda, 1,
b, 1 );
1313 for ( jj = 1; jj <= ju; jj++ )
1317 if ( qraux[j-1] != 0.0 )
1319 temp = a[j-1+(j-1)*lda];
1320 a[j-1+(j-1)*lda] = qraux[j-1];
1324 t = -
ddot ( n-j+1, a+j-1+(j-1)*lda, 1, rsd+j-1, 1 )
1326 daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, rsd+j-1, 1 );
1331 t = -
ddot ( n-j+1, a+j-1+(j-1)*lda, 1, ab+j-1, 1 )
1333 daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, ab+j-1, 1 );
1335 a[j-1+(j-1)*lda] = temp;
1344 void dscal (
int n,
float sa,
float x[],
int incx )
1394 else if ( incx == 1 )
1398 for ( i = 0; i < m; i++ )
1403 for ( i = m; i < n; i = i + 5 )
1406 x[i+1] = sa * x[i+1];
1407 x[i+2] = sa * x[i+2];
1408 x[i+3] = sa * x[i+3];
1409 x[i+4] = sa * x[i+4];
1420 ix = ( - n + 1 ) * incx;
1423 for ( i = 0; i < n; i++ )
1433 void dswap (
int n,
float x[],
int incx,
float y[],
int incy )
1487 else if ( incx == 1 && incy == 1 )
1491 for ( i = 0; i < m; i++ )
1498 for ( i = m; i < n; i = i + 3 )
1521 ix = ( - n + 1 ) * incx;
1530 iy = ( - n + 1 ) * incy;
1533 for ( i = 0; i < n; i++ )
1548 void qr_solve (
int m,
int n,
float a[],
float b[],
float x[] )
1613 ind =
dqrls ( a_qr, lda, m, n, tol, &kr,
b, x, r, jpvt, qraux, itask );