added benchmark for gemm3m functions
This commit is contained in:
		
							parent
							
								
									9d7057366d
								
							
						
					
					
						commit
						dab4edd069
					
				|  | @ -112,6 +112,11 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||
|        spotrf.mkl dpotrf.mkl cpotrf.mkl zpotrf.mkl \
 | ||||
|        ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl | ||||
| 
 | ||||
| 
 | ||||
| goto_3m :: cgemm3m.goto zgemm3m.goto | ||||
| 
 | ||||
| mkl_3m  :: cgemm3m.mkl zgemm3m.mkl | ||||
| 
 | ||||
| all :: goto mkl atlas acml | ||||
| 
 | ||||
| ##################################### Slinpack ####################################################
 | ||||
|  | @ -1043,6 +1048,22 @@ zaxpy.mkl : zaxpy.$(SUFFIX) | |||
| 	-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| 
 | ||||
| 
 | ||||
| ##################################### Cgemm3m ####################################################
 | ||||
| 
 | ||||
| cgemm3m.goto : cgemm3m.$(SUFFIX) ../$(LIBNAME) | ||||
| 	$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| 
 | ||||
| cgemm3m.mkl : cgemm3m.$(SUFFIX) | ||||
| 	-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| 
 | ||||
| ##################################### Zgemm3m ####################################################
 | ||||
| 
 | ||||
| zgemm3m.goto : zgemm3m.$(SUFFIX) ../$(LIBNAME) | ||||
| 	$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| 
 | ||||
| zgemm3m.mkl : zgemm3m.$(SUFFIX) | ||||
| 	-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| 
 | ||||
| 
 | ||||
| ###################################################################################################
 | ||||
| 
 | ||||
|  | @ -1250,6 +1271,11 @@ caxpy.$(SUFFIX) : axpy.c | |||
| zaxpy.$(SUFFIX) : axpy.c | ||||
| 	$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| 
 | ||||
| cgemm3m.$(SUFFIX) : gemm3m.c | ||||
| 	$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | ||||
| 
 | ||||
| zgemm3m.$(SUFFIX) : gemm3m.c | ||||
| 	$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| 
 | ||||
| 
 | ||||
| clean :: | ||||
|  |  | |||
|  | @ -0,0 +1,212 @@ | |||
| /***************************************************************************
 | ||||
| Copyright (c) 2014, The OpenBLAS Project | ||||
| All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | ||||
| met: | ||||
| 1. Redistributions of source code must retain the above copyright | ||||
| notice, this list of conditions and the following disclaimer. | ||||
| 2. Redistributions in binary form must reproduce the above copyright | ||||
| notice, this list of conditions and the following disclaimer in | ||||
| the documentation and/or other materials provided with the | ||||
| distribution. | ||||
| 3. Neither the name of the OpenBLAS project nor the names of | ||||
| its contributors may be used to endorse or promote products | ||||
| derived from this software without specific prior written permission. | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | ||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #ifdef __CYGWIN32__ | ||||
| #include <sys/time.h> | ||||
| #endif | ||||
| #include "common.h" | ||||
| 
 | ||||
| 
 | ||||
| #undef GEMM | ||||
| 
 | ||||
| #ifndef COMPLEX | ||||
| 
 | ||||
| #ifdef DOUBLE | ||||
| #define GEMM   BLASFUNC(dgemm) | ||||
| #else | ||||
| #define GEMM   BLASFUNC(sgemm) | ||||
| #endif | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| #ifdef DOUBLE | ||||
| #define GEMM   BLASFUNC(zgemm3m) | ||||
| #else | ||||
| #define GEMM   BLASFUNC(cgemm3m) | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | ||||
| 
 | ||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | ||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | ||||
| #endif | ||||
| 
 | ||||
| int gettimeofday(struct timeval *tv, void *tz){ | ||||
| 
 | ||||
|   FILETIME ft; | ||||
|   unsigned __int64 tmpres = 0; | ||||
|   static int tzflag; | ||||
| 
 | ||||
|   if (NULL != tv) | ||||
|     { | ||||
|       GetSystemTimeAsFileTime(&ft); | ||||
| 
 | ||||
|       tmpres |= ft.dwHighDateTime; | ||||
|       tmpres <<= 32; | ||||
|       tmpres |= ft.dwLowDateTime; | ||||
| 
 | ||||
|       /*converting file time to unix epoch*/ | ||||
|       tmpres /= 10;  /*convert into microseconds*/ | ||||
|       tmpres -= DELTA_EPOCH_IN_MICROSECS; | ||||
|       tv->tv_sec = (long)(tmpres / 1000000UL); | ||||
|       tv->tv_usec = (long)(tmpres % 1000000UL); | ||||
|     } | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | ||||
| 
 | ||||
| static void *huge_malloc(BLASLONG size){ | ||||
|   int shmid; | ||||
|   void *address; | ||||
| 
 | ||||
| #ifndef SHM_HUGETLB | ||||
| #define SHM_HUGETLB 04000 | ||||
| #endif | ||||
| 
 | ||||
|   if ((shmid =shmget(IPC_PRIVATE, | ||||
| 		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | ||||
| 		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | ||||
|     printf( "Memory allocation failed(shmget).\n"); | ||||
|     exit(1); | ||||
|   } | ||||
| 
 | ||||
|   address = shmat(shmid, NULL, SHM_RND); | ||||
| 
 | ||||
|   if ((BLASLONG)address == -1){ | ||||
|     printf( "Memory allocation failed(shmat).\n"); | ||||
|     exit(1); | ||||
|   } | ||||
| 
 | ||||
|   shmctl(shmid, IPC_RMID, 0); | ||||
| 
 | ||||
|   return address; | ||||
| } | ||||
| 
 | ||||
| #define malloc huge_malloc | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| int MAIN__(int argc, char *argv[]){ | ||||
| 
 | ||||
|   FLOAT *a, *b, *c; | ||||
|   FLOAT alpha[] = {1.0, 1.0}; | ||||
|   FLOAT beta [] = {1.0, 1.0}; | ||||
|   char trans='N'; | ||||
|   blasint m, i, j; | ||||
|   int loops = 1; | ||||
|   int l; | ||||
|   char *p; | ||||
| 
 | ||||
|   int from =   1; | ||||
|   int to   = 200; | ||||
|   int step =   1; | ||||
| 
 | ||||
|   struct timeval start, stop; | ||||
|   double time1,timeg; | ||||
| 
 | ||||
|   argc--;argv++; | ||||
| 
 | ||||
|   if (argc > 0) { from     = atol(*argv);		argc--; argv++;} | ||||
|   if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;} | ||||
|   if (argc > 0) { step     = atol(*argv);		argc--; argv++;} | ||||
| 
 | ||||
|   if ((p = getenv("OPENBLAS_TRANS")))  trans=*p; | ||||
| 
 | ||||
|   fprintf(stderr, "From : %3d  To : %3d Step=%d : Trans=%c\n", from, to, step, trans); | ||||
| 
 | ||||
|   if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | ||||
|     fprintf(stderr,"Out of Memory!!\n");exit(1); | ||||
|   } | ||||
| 
 | ||||
|   if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | ||||
|     fprintf(stderr,"Out of Memory!!\n");exit(1); | ||||
|   } | ||||
| 
 | ||||
|   if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | ||||
|     fprintf(stderr,"Out of Memory!!\n");exit(1); | ||||
|   } | ||||
| 
 | ||||
|   p = getenv("OPENBLAS_LOOPS"); | ||||
|   if ( p != NULL ) | ||||
| 	loops = atoi(p); | ||||
| 
 | ||||
| 
 | ||||
| #ifdef linux | ||||
|   srandom(getpid()); | ||||
| #endif | ||||
| 
 | ||||
|   fprintf(stderr, "   SIZE       Flops\n"); | ||||
| 
 | ||||
|   for(m = from; m <= to; m += step) | ||||
|   { | ||||
| 
 | ||||
|     timeg=0; | ||||
| 
 | ||||
|     fprintf(stderr, " %6d : ", (int)m); | ||||
| 
 | ||||
|     for (l=0; l<loops; l++) | ||||
|     { | ||||
|    | ||||
|     	for(j = 0; j < m; j++){ | ||||
|       		for(i = 0; i < m * COMPSIZE; i++){ | ||||
| 			a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| 			b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| 			c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
|       		} | ||||
|     	} | ||||
| 
 | ||||
|     	gettimeofday( &start, (struct timezone *)0); | ||||
| 
 | ||||
|     	GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
| 
 | ||||
|     	gettimeofday( &stop, (struct timezone *)0); | ||||
| 
 | ||||
|     	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | ||||
| 
 | ||||
| 	timeg += time1; | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     timeg /= loops; | ||||
|     fprintf(stderr, | ||||
| 	    " %10.2f MFlops\n", | ||||
| 	    COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6); | ||||
| 
 | ||||
|   } | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
| 
 | ||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | ||||
		Loading…
	
		Reference in New Issue