My Project
Loading...
Searching...
No Matches
blas.h
Go to the documentation of this file.
1/*************************************************************************
2Copyright (c) 2005-2007, Sergey Bochkanov (ALGLIB project).
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are
6met:
7
8- Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11- Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer listed
13 in this license in the documentation and/or other materials
14 provided with the distribution.
15
16- Neither the name of the copyright holders nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*************************************************************************/
32
33#ifndef _blas_h
34#define _blas_h
35
36#include "ap.h"
37#include "amp.h"
38namespace blas
39{
40 template<unsigned int Precision>
42 int i1,
43 int i2);
44 template<unsigned int Precision>
46 int i1,
47 int i2);
48 template<unsigned int Precision>
50 int i1,
51 int i2,
52 int j);
53 template<unsigned int Precision>
55 int j1,
56 int j2,
57 int i);
58 template<unsigned int Precision>
60 int i1,
61 int i2,
62 int j1,
63 int j2,
65 template<unsigned int Precision>
67 int is1,
68 int is2,
69 int js1,
70 int js2,
72 int id1,
73 int id2,
74 int jd1,
75 int jd2);
76 template<unsigned int Precision>
78 int i1,
79 int i2,
80 int j1,
81 int j2,
83 template<unsigned int Precision>
85 int is1,
86 int is2,
87 int js1,
88 int js2,
90 int id1,
91 int id2,
92 int jd1,
93 int jd2);
94 template<unsigned int Precision>
96 int i1,
97 int i2,
98 int j1,
99 int j2,
100 bool trans,
102 int ix1,
103 int ix2,
106 int iy1,
107 int iy2,
109 template<unsigned int Precision>
112 template<unsigned int Precision>
114 int ai1,
115 int ai2,
116 int aj1,
117 int aj2,
118 bool transa,
120 int bi1,
121 int bi2,
122 int bj1,
123 int bj2,
124 bool transb,
127 int ci1,
128 int ci2,
129 int cj1,
130 int cj2,
133
134
135 template<unsigned int Precision>
137 int i1,
138 int i2)
139 {
141 int n;
142 int ix;
146
147
148 n = i2-i1+1;
149 if( n<1 )
150 {
151 result = 0;
152 return result;
153 }
154 if( n==1 )
155 {
156 result = amp::abs<Precision>(x(i1));
157 return result;
158 }
159 scl = 0;
160 ssq = 1;
161 for(ix=i1; ix<=i2; ix++)
162 {
163 if( x(ix)!=0 )
164 {
165 absxi = amp::abs<Precision>(x(ix));
166 if( scl<absxi )
167 {
168 ssq = 1+ssq*amp::sqr<Precision>(scl/absxi);
169 scl = absxi;
170 }
171 else
172 {
173 ssq = ssq+amp::sqr<Precision>(absxi/scl);
174 }
175 }
176 }
177 result = scl*amp::sqrt<Precision>(ssq);
178 return result;
179 }
180
181
182 template<unsigned int Precision>
184 int i1,
185 int i2)
186 {
187 int result;
188 int i;
190
191
192 result = i1;
193 a = amp::abs<Precision>(x(result));
194 for(i=i1+1; i<=i2; i++)
195 {
196 if( amp::abs<Precision>(x(i))>amp::abs<Precision>(x(result)) )
197 {
198 result = i;
199 }
200 }
201 return result;
202 }
203
204
205 template<unsigned int Precision>
207 int i1,
208 int i2,
209 int j)
210 {
211 int result;
212 int i;
214
215
216 result = i1;
217 a = amp::abs<Precision>(x(result,j));
218 for(i=i1+1; i<=i2; i++)
219 {
220 if( amp::abs<Precision>(x(i,j))>amp::abs<Precision>(x(result,j)) )
221 {
222 result = i;
223 }
224 }
225 return result;
226 }
227
228
229 template<unsigned int Precision>
231 int j1,
232 int j2,
233 int i)
234 {
235 int result;
236 int j;
238
239
240 result = j1;
241 a = amp::abs<Precision>(x(i,result));
242 for(j=j1+1; j<=j2; j++)
243 {
244 if( amp::abs<Precision>(x(i,j))>amp::abs<Precision>(x(i,result)) )
245 {
246 result = j;
247 }
248 }
249 return result;
250 }
251
252
253 template<unsigned int Precision>
255 int i1,
256 int i2,
257 int j1,
258 int j2,
260 {
262 int i;
263 int j;
264
265
266 ap::ap_error::make_assertion(i2-i1==j2-j1);
267 for(j=j1; j<=j2; j++)
268 {
269 work(j) = 0;
270 }
271 for(i=i1; i<=i2; i++)
272 {
273 for(j=ap::maxint(j1, j1+i-i1-1); j<=j2; j++)
274 {
275 work(j) = work(j)+amp::abs<Precision>(a(i,j));
276 }
277 }
278 result = 0;
279 for(j=j1; j<=j2; j++)
280 {
281 result = amp::maximum<Precision>(result, work(j));
282 }
283 return result;
284 }
285
286
287 template<unsigned int Precision>
289 int is1,
290 int is2,
291 int js1,
292 int js2,
294 int id1,
295 int id2,
296 int jd1,
297 int jd2)
298 {
299 int isrc;
300 int idst;
301
302
303 if( is1>is2 || js1>js2 )
304 {
305 return;
306 }
307 ap::ap_error::make_assertion(is2-is1==id2-id1);
308 ap::ap_error::make_assertion(js2-js1==jd2-jd1);
309 for(isrc=is1; isrc<=is2; isrc++)
310 {
311 idst = isrc-is1+id1;
312 ap::vmove(b.getrow(idst, jd1, jd2), a.getrow(isrc, js1, js2));
313 }
314 }
315
316
317 template<unsigned int Precision>
319 int i1,
320 int i2,
321 int j1,
322 int j2,
324 {
325 int i;
326 int j;
327 int ips;
328 int jps;
329 int l;
330
331
332 if( i1>i2 || j1>j2 )
333 {
334 return;
335 }
336 ap::ap_error::make_assertion(i1-i2==j1-j2);
337 for(i=i1; i<=i2-1; i++)
338 {
339 j = j1+i-i1;
340 ips = i+1;
341 jps = j1+ips-i1;
342 l = i2-i;
343 ap::vmove(work.getvector(1, l), a.getcolumn(j, ips, i2));
344 ap::vmove(a.getcolumn(j, ips, i2), a.getrow(i, jps, j2));
345 ap::vmove(a.getrow(i, jps, j2), work.getvector(1, l));
346 }
347 }
348
349
350 template<unsigned int Precision>
352 int is1,
353 int is2,
354 int js1,
355 int js2,
357 int id1,
358 int id2,
359 int jd1,
360 int jd2)
361 {
362 int isrc;
363 int jdst;
364
365
366 if( is1>is2 || js1>js2 )
367 {
368 return;
369 }
370 ap::ap_error::make_assertion(is2-is1==jd2-jd1);
371 ap::ap_error::make_assertion(js2-js1==id2-id1);
372 for(isrc=is1; isrc<=is2; isrc++)
373 {
374 jdst = isrc-is1+jd1;
375 ap::vmove(b.getcolumn(jdst, id1, id2), a.getrow(isrc, js1, js2));
376 }
377 }
378
379
380 template<unsigned int Precision>
382 int i1,
383 int i2,
384 int j1,
385 int j2,
386 bool trans,
388 int ix1,
389 int ix2,
392 int iy1,
393 int iy2,
395 {
396 int i;
398
399
400 if( !trans )
401 {
402
403 //
404 // y := alpha*A*x + beta*y;
405 //
406 if( i1>i2 || j1>j2 )
407 {
408 return;
409 }
410 ap::ap_error::make_assertion(j2-j1==ix2-ix1);
411 ap::ap_error::make_assertion(i2-i1==iy2-iy1);
412
413 //
414 // beta*y
415 //
416 if( beta==0 )
417 {
418 for(i=iy1; i<=iy2; i++)
419 {
420 y(i) = 0;
421 }
422 }
423 else
424 {
425 ap::vmul(y.getvector(iy1, iy2), beta);
426 }
427
428 //
429 // alpha*A*x
430 //
431 for(i=i1; i<=i2; i++)
432 {
433 v = ap::vdotproduct(a.getrow(i, j1, j2), x.getvector(ix1, ix2));
434 y(iy1+i-i1) = y(iy1+i-i1)+alpha*v;
435 }
436 }
437 else
438 {
439
440 //
441 // y := alpha*A'*x + beta*y;
442 //
443 if( i1>i2 || j1>j2 )
444 {
445 return;
446 }
447 ap::ap_error::make_assertion(i2-i1==ix2-ix1);
448 ap::ap_error::make_assertion(j2-j1==iy2-iy1);
449
450 //
451 // beta*y
452 //
453 if( beta==0 )
454 {
455 for(i=iy1; i<=iy2; i++)
456 {
457 y(i) = 0;
458 }
459 }
460 else
461 {
462 ap::vmul(y.getvector(iy1, iy2), beta);
463 }
464
465 //
466 // alpha*A'*x
467 //
468 for(i=i1; i<=i2; i++)
469 {
470 v = alpha*x(ix1+i-i1);
471 ap::vadd(y.getvector(iy1, iy2), a.getrow(i, j1, j2), v);
472 }
473 }
474 }
475
476
477 template<unsigned int Precision>
480 {
486
487
488 xabs = amp::abs<Precision>(x);
489 yabs = amp::abs<Precision>(y);
490 w = amp::maximum<Precision>(xabs, yabs);
491 z = amp::minimum<Precision>(xabs, yabs);
492 if( z==0 )
493 {
494 result = w;
495 }
496 else
497 {
498 result = w*amp::sqrt<Precision>(1+amp::sqr<Precision>(z/w));
499 }
500 return result;
501 }
502
503
504 template<unsigned int Precision>
506 int ai1,
507 int ai2,
508 int aj1,
509 int aj2,
510 bool transa,
512 int bi1,
513 int bi2,
514 int bj1,
515 int bj2,
516 bool transb,
519 int ci1,
520 int ci2,
521 int cj1,
522 int cj2,
525 {
526 int arows;
527 int acols;
528 int brows;
529 int bcols;
530 int crows;
531 int ccols;
532 int i;
533 int j;
534 int k;
535 int l;
536 int r;
538
539
540
541 //
542 // Setup
543 //
544 if( !transa )
545 {
546 arows = ai2-ai1+1;
547 acols = aj2-aj1+1;
548 }
549 else
550 {
551 arows = aj2-aj1+1;
552 acols = ai2-ai1+1;
553 }
554 if( !transb )
555 {
556 brows = bi2-bi1+1;
557 bcols = bj2-bj1+1;
558 }
559 else
560 {
561 brows = bj2-bj1+1;
562 bcols = bi2-bi1+1;
563 }
564 ap::ap_error::make_assertion(acols==brows);
565 if( arows<=0 || acols<=0 || brows<=0 || bcols<=0 )
566 {
567 return;
568 }
569 crows = arows;
570 ccols = bcols;
571
572 //
573 // Test WORK
574 //
575 i = ap::maxint(arows, acols);
576 i = ap::maxint(brows, i);
577 i = ap::maxint(i, bcols);
578 work(1) = 0;
579 work(i) = 0;
580
581 //
582 // Prepare C
583 //
584 if( beta==0 )
585 {
586 for(i=ci1; i<=ci2; i++)
587 {
588 for(j=cj1; j<=cj2; j++)
589 {
590 c(i,j) = 0;
591 }
592 }
593 }
594 else
595 {
596 for(i=ci1; i<=ci2; i++)
597 {
598 ap::vmul(c.getrow(i, cj1, cj2), beta);
599 }
600 }
601
602 //
603 // A*B
604 //
605 if( !transa && !transb )
606 {
607 for(l=ai1; l<=ai2; l++)
608 {
609 for(r=bi1; r<=bi2; r++)
610 {
611 v = alpha*a(l,aj1+r-bi1);
612 k = ci1+l-ai1;
613 ap::vadd(c.getrow(k, cj1, cj2), b.getrow(r, bj1, bj2), v);
614 }
615 }
616 return;
617 }
618
619 //
620 // A*B'
621 //
622 if( !transa && transb )
623 {
624 if( arows*acols<brows*bcols )
625 {
626 for(r=bi1; r<=bi2; r++)
627 {
628 for(l=ai1; l<=ai2; l++)
629 {
630 v = ap::vdotproduct(a.getrow(l, aj1, aj2), b.getrow(r, bj1, bj2));
631 c(ci1+l-ai1,cj1+r-bi1) = c(ci1+l-ai1,cj1+r-bi1)+alpha*v;
632 }
633 }
634 return;
635 }
636 else
637 {
638 for(l=ai1; l<=ai2; l++)
639 {
640 for(r=bi1; r<=bi2; r++)
641 {
642 v = ap::vdotproduct(a.getrow(l, aj1, aj2), b.getrow(r, bj1, bj2));
643 c(ci1+l-ai1,cj1+r-bi1) = c(ci1+l-ai1,cj1+r-bi1)+alpha*v;
644 }
645 }
646 return;
647 }
648 }
649
650 //
651 // A'*B
652 //
653 if( transa && !transb )
654 {
655 for(l=aj1; l<=aj2; l++)
656 {
657 for(r=bi1; r<=bi2; r++)
658 {
659 v = alpha*a(ai1+r-bi1,l);
660 k = ci1+l-aj1;
661 ap::vadd(c.getrow(k, cj1, cj2), b.getrow(r, bj1, bj2), v);
662 }
663 }
664 return;
665 }
666
667 //
668 // A'*B'
669 //
670 if( transa && transb )
671 {
672 if( arows*acols<brows*bcols )
673 {
674 for(r=bi1; r<=bi2; r++)
675 {
676 for(i=1; i<=crows; i++)
677 {
678 work(i) = amp::ampf<Precision>("0.0");
679 }
680 for(l=ai1; l<=ai2; l++)
681 {
682 v = alpha*b(r,bj1+l-ai1);
683 k = cj1+r-bi1;
684 ap::vadd(work.getvector(1, crows), a.getrow(l, aj1, aj2), v);
685 }
686 ap::vadd(c.getcolumn(k, ci1, ci2), work.getvector(1, crows));
687 }
688 return;
689 }
690 else
691 {
692 for(l=aj1; l<=aj2; l++)
693 {
694 k = ai2-ai1+1;
695 ap::vmove(work.getvector(1, k), a.getcolumn(l, ai1, ai2));
696 for(r=bi1; r<=bi2; r++)
697 {
698 v = ap::vdotproduct(work.getvector(1, k), b.getrow(r, bj1, bj2));
699 c(ci1+l-aj1,cj1+r-bi1) = c(ci1+l-aj1,cj1+r-bi1)+alpha*v;
700 }
701 }
702 return;
703 }
704 }
705 }
706} // namespace
707
708#endif
int l
Definition: cfEzgcd.cc:100
int i
Definition: cfEzgcd.cc:132
int k
Definition: cfEzgcd.cc:99
Variable x
Definition: cfModGcd.cc:4082
CanonicalForm b
Definition: cfModGcd.cc:4103
Definition: amp.h:82
static void make_assertion(bool bClause)
Definition: ap.h:49
Variable alpha
Definition: facAbsBiFact.cc:51
return result
Definition: facAbsBiFact.cc:75
const CanonicalForm int const CFList const Variable & y
Definition: facAbsFact.cc:53
Variable beta
Definition: facAbsFact.cc:95
const CanonicalForm & w
Definition: facAbsFact.cc:51
const Variable & v
< [in] a sqrfree bivariate poly
Definition: facBivar.h:39
int j
Definition: facHensel.cc:110
int maxint(int m1, int m2)
Definition: ap.cpp:162
T vdotproduct(const_raw_vector< T > v1, const_raw_vector< T > v2)
Definition: ap.h:181
void vmove(raw_vector< T > vdst, const_raw_vector< T > vsrc)
Definition: ap.h:237
void vadd(raw_vector< T > vdst, const_raw_vector< T > vsrc)
Definition: ap.h:413
void vmul(raw_vector< T > vdst, T2 alpha)
Definition: ap.h:603
Definition: blas.h:39
int columnidxabsmax(const ap::template_2d_array< amp::ampf< Precision > > &x, int i1, int i2, int j)
Definition: blas.h:206
void matrixmatrixmultiply(const ap::template_2d_array< amp::ampf< Precision > > &a, int ai1, int ai2, int aj1, int aj2, bool transa, const ap::template_2d_array< amp::ampf< Precision > > &b, int bi1, int bi2, int bj1, int bj2, bool transb, amp::ampf< Precision > alpha, ap::template_2d_array< amp::ampf< Precision > > &c, int ci1, int ci2, int cj1, int cj2, amp::ampf< Precision > beta, ap::template_1d_array< amp::ampf< Precision > > &work)
Definition: blas.h:505
int rowidxabsmax(const ap::template_2d_array< amp::ampf< Precision > > &x, int j1, int j2, int i)
Definition: blas.h:230
void copyandtranspose(const ap::template_2d_array< amp::ampf< Precision > > &a, int is1, int is2, int js1, int js2, ap::template_2d_array< amp::ampf< Precision > > &b, int id1, int id2, int jd1, int jd2)
Definition: blas.h:351
amp::ampf< Precision > vectornorm2(const ap::template_1d_array< amp::ampf< Precision > > &x, int i1, int i2)
Definition: blas.h:136
amp::ampf< Precision > upperhessenberg1norm(const ap::template_2d_array< amp::ampf< Precision > > &a, int i1, int i2, int j1, int j2, ap::template_1d_array< amp::ampf< Precision > > &work)
Definition: blas.h:254
void matrixvectormultiply(const ap::template_2d_array< amp::ampf< Precision > > &a, int i1, int i2, int j1, int j2, bool trans, const ap::template_1d_array< amp::ampf< Precision > > &x, int ix1, int ix2, amp::ampf< Precision > alpha, ap::template_1d_array< amp::ampf< Precision > > &y, int iy1, int iy2, amp::ampf< Precision > beta)
Definition: blas.h:381
amp::ampf< Precision > pythag2(amp::ampf< Precision > x, amp::ampf< Precision > y)
Definition: blas.h:478
void inplacetranspose(ap::template_2d_array< amp::ampf< Precision > > &a, int i1, int i2, int j1, int j2, ap::template_1d_array< amp::ampf< Precision > > &work)
Definition: blas.h:318
void copymatrix(const ap::template_2d_array< amp::ampf< Precision > > &a, int is1, int is2, int js1, int js2, ap::template_2d_array< amp::ampf< Precision > > &b, int id1, int id2, int jd1, int jd2)
Definition: blas.h:288
int vectoridxabsmax(const ap::template_1d_array< amp::ampf< Precision > > &x, int i1, int i2)
Definition: blas.h:183