From fc0e0391f3f8430aa6b70509c34f078eb1438a2c Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Fri, 24 Apr 2015 14:30:44 +0200 Subject: [PATCH 1/2] bugfixes: replaced int with BLASLONG --- kernel/x86_64/caxpy.c | 2 +- kernel/x86_64/cdot.c | 6 ++++-- kernel/x86_64/daxpy.c | 4 ++-- kernel/x86_64/ddot.c | 2 +- kernel/x86_64/zaxpy.c | 2 +- kernel/x86_64/zdot.c | 5 +++-- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/kernel/x86_64/caxpy.c b/kernel/x86_64/caxpy.c index 455d9d2ce..1ee0499a7 100644 --- a/kernel/x86_64/caxpy.c +++ b/kernel/x86_64/caxpy.c @@ -84,7 +84,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, if ( (inc_x == 1) && (inc_y == 1) ) { - int n1 = n & -32; + BLASLONG n1 = n & -32; if ( n1 ) { diff --git a/kernel/x86_64/cdot.c b/kernel/x86_64/cdot.c index 266ab4fb9..2b2c4ff7a 100644 --- a/kernel/x86_64/cdot.c +++ b/kernel/x86_64/cdot.c @@ -109,7 +109,7 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in if ( (inc_x == 1) && (inc_y == 1) ) { - int n1 = n & -16; + BLASLONG n1 = n & -16; if ( n1 ) { @@ -119,8 +119,10 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in dot[4] += dot[6]; dot[5] += dot[7]; } + i = n1; - int j = i * 2; + BLASLONG j = i * 2; + while( i < n ) { diff --git a/kernel/x86_64/daxpy.c b/kernel/x86_64/daxpy.c index 10cc573db..9207e209f 100644 --- a/kernel/x86_64/daxpy.c +++ b/kernel/x86_64/daxpy.c @@ -80,9 +80,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS { #if defined(SANDYBRIDGE) - int n1 = n & -32; + BLASLONG n1 = n & -32; #else - int n1 = n & -16; + BLASLONG n1 = n & -16; #endif if ( n1 ) diff --git a/kernel/x86_64/ddot.c b/kernel/x86_64/ddot.c index 0f77d5fbc..4bf8082c9 100644 --- a/kernel/x86_64/ddot.c +++ b/kernel/x86_64/ddot.c @@ -83,7 +83,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) if ( (inc_x == 1) && (inc_y == 1) ) { - int n1 = n & -16; + BLASLONG n1 = n & -16; if ( n1 ) ddot_kernel_8(n1, x, y , &dot ); diff --git a/kernel/x86_64/zaxpy.c b/kernel/x86_64/zaxpy.c index 1aa95d2b9..560acc7f9 100644 --- a/kernel/x86_64/zaxpy.c +++ b/kernel/x86_64/zaxpy.c @@ -84,7 +84,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, if ( (inc_x == 1) && (inc_y == 1) ) { - int n1 = n & -16; + BLASLONG n1 = n & -16; if ( n1 ) { diff --git a/kernel/x86_64/zdot.c b/kernel/x86_64/zdot.c index c0cca521b..eee00fd9f 100644 --- a/kernel/x86_64/zdot.c +++ b/kernel/x86_64/zdot.c @@ -105,13 +105,14 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in if ( (inc_x == 1) && (inc_y == 1) ) { - int n1 = n & -8; + BLASLONG n1 = n & -8; if ( n1 ) zdot_kernel_8(n1, x, y , dot ); i = n1; - int j = i * 2; + BLASLONG j = i * 2; + while( i < n ) { From e77db2af31c54e87ff80009b2fba1560643f6213 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sat, 25 Apr 2015 14:53:07 +0200 Subject: [PATCH 2/2] add benchmarks for zgeru and cgeru --- benchmark/Makefile | 41 +++++++++++++++++++++++++++++++++++++---- benchmark/ger.c | 9 ++++++++- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index 8a322c4f2..1d2e9ff66 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -39,7 +39,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \ ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ - sger.goto dger.goto \ + sger.goto dger.goto cger.goto zger.goto \ sdot.goto ddot.goto cdot.goto zdot.goto \ saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ sscal.goto dscal.goto cscal.goto zscal.goto \ @@ -62,7 +62,7 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \ ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \ ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \ - sger.acml dger.acml \ + sger.acml dger.acml cger.acml zger.acml \ sdot.acml ddot.acml cdot.acml zdot.acml \ saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \ sscal.acml dscal.acml cscal.acml zscal.acml \ @@ -85,7 +85,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \ ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \ ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \ - sger.atlas dger.atlas \ + sger.atlas dger.atlas cger.atlas zger.atlas\ sdot.atlas ddot.atlas \ saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \ sscal.atlas dscal.atlas cscal.atlas zscal.atlas \ @@ -109,7 +109,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \ ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \ ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \ - sger.mkl dger.mkl \ + sger.mkl dger.mkl cger.mkl zger.mkl \ sdot.mkl ddot.mkl cdot.mkl zdot.mkl \ saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \ sscal.mkl dscal.mkl cscal.mkl zscal.mkl \ @@ -736,6 +736,32 @@ dger.atlas : dger.$(SUFFIX) dger.mkl : dger.$(SUFFIX) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) +##################################### Cger #################################################### +cger.goto : cger.$(SUFFIX) ../$(LIBNAME) + $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm + +cger.acml : cger.$(SUFFIX) + -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) + +cger.atlas : cger.$(SUFFIX) + -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) + +cger.mkl : cger.$(SUFFIX) + -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) + +##################################### Zger #################################################### +zger.goto : zger.$(SUFFIX) ../$(LIBNAME) + $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm + +zger.acml : zger.$(SUFFIX) + -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) + +zger.atlas : zger.$(SUFFIX) + -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) + +zger.mkl : zger.$(SUFFIX) + -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) + ##################################### Ssymv #################################################### ssymv.goto : ssymv.$(SUFFIX) ../$(LIBNAME) $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm @@ -1348,6 +1374,13 @@ sger.$(SUFFIX) : ger.c dger.$(SUFFIX) : ger.c $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ +cger.$(SUFFIX) : ger.c + $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ + +zger.$(SUFFIX) : ger.c + $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ + + ssymv.$(SUFFIX) : symv.c $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ diff --git a/benchmark/ger.c b/benchmark/ger.c index 354281006..a752a3c3e 100644 --- a/benchmark/ger.c +++ b/benchmark/ger.c @@ -35,12 +35,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef GER - +#ifdef COMPLEX +#ifdef DOUBLE +#define GER BLASFUNC(zgeru) +#else +#define GER BLASFUNC(cgeru) +#endif +#else #ifdef DOUBLE #define GER BLASFUNC(dger) #else #define GER BLASFUNC(sger) #endif +#endif #if defined(__WIN32__) || defined(__WIN64__)