0d0c6f7d7d 
								
							 
						 
						
							
							
								
								optimized dgemm for POWER8  
							
							
							
						 
						
							2016-04-27 14:01:08 +02:00  
				
					
						
							
							
								 
						
							
								40ac64ae4f 
								
							 
						 
						
							
							
								
								updated param.h for EXCAVATOR  
							
							
							
						 
						
							2016-04-25 10:40:04 +02:00  
				
					
						
							
							
								 
						
							
								089aad57f7 
								
							 
						 
						
							
							
								
								updated param.h for POWER8  
							
							
							
						 
						
							2016-04-23 14:26:24 +02:00  
				
					
						
							
							
								 
						
							
								879a51165f 
								
							 
						 
						
							
							
								
								Optimized zgemm and tested zgemm again  
							
							
							
						 
						
							2016-04-22 13:07:12 +02:00  
				
					
						
							
							
								 
						
							
								2c3dfe2bf3 
								
							 
						 
						
							
							
								
								MIPS P5600(32 bit) and I6400(64 bit) cores support added.  
							
							... 
							
							
							
							Seperated mips and mips64 files.
Configurations support for mips 32 bit.
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-04-22 14:03:18 +05:30  
				
					
						
							
							
								 
						
							
								3c6294ca3d 
								
							 
						 
						
							
							
								
								added optimized sgemm_tcopy for power8  
							
							
							
						 
						
							2016-04-19 16:08:54 +02:00  
				
					
						
							
							
								 
						
							
								dd43661cfd 
								
							 
						 
						
							
							
								
								Init IBM z system (s390x) porting.  
							
							
							
						 
						
							2016-04-15 18:02:24 -04:00  
				
					
						
							
							
								 
						
							
								e173c51c04 
								
							 
						 
						
							
							
								
								updated zgemm- and ztrmm-kernel for POWER8  
							
							
							
						 
						
							2016-04-08 09:05:37 +02:00  
				
					
						
							
							
								 
						
							
								9c42f0374a 
								
							 
						 
						
							
							
								
								Updated cgemm- and sgemm-kernel for POWER8 SMP  
							
							
							
						 
						
							2016-04-07 15:08:15 +02:00  
				
					
						
							
							
								 
						
							
								a51102e9b7 
								
							 
						 
						
							
							
								
								bugfixes for sgemm- and cgemm-kernel  
							
							
							
						 
						
							2016-04-06 11:15:21 +02:00  
				
					
						
							
							
								 
						
							
								c5b1fbcb2e 
								
							 
						 
						
							
							
								
								updated optimized cgemm- and ctrmm-kernel for POWER8  
							
							
							
						 
						
							2016-04-04 09:12:08 +02:00  
				
					
						
							
							
								 
						
							
								6a9bbfc227 
								
							 
						 
						
							
							
								
								updated sgemm- and strmm-kernel for POWER8  
							
							
							
						 
						
							2016-04-02 17:16:36 +02:00  
				
					
						
							
							
								 
						
							
								e1df5a6e23 
								
							 
						 
						
							
							
								
								fixed sgemm- and strmm-kernel  
							
							
							
						 
						
							2016-03-18 12:12:03 +01:00  
				
					
						
							
							
								 
						
							
								5c658f8746 
								
							 
						 
						
							
							
								
								add optimized cgemm- and ctrmm-kernel for POWER8  
							
							
							
						 
						
							2016-03-18 08:17:25 +01:00  
				
					
						
							
							
								 
						
							
								96284ab295 
								
							 
						 
						
							
							
								
								added sgemm- and strmm-kernel for POWER8  
							
							
							
						 
						
							2016-03-14 13:52:44 +01:00  
				
					
						
							
							
								 
						
							
								91e1c5080c 
								
							 
						 
						
							
							
								
								modified configuration, to use power6 sgemm kernel for power8  
							
							
							
						 
						
							2016-03-04 13:38:57 +01:00  
				
					
						
							
							
								 
						
							
								b752858d6c 
								
							 
						 
						
							
							
								
								added dgemm-, dtrmm-, zgemm- and ztrmm-kernel for power8  
							
							
							
						 
						
							2016-03-01 07:33:56 +01:00  
				
					
						
							
							
								 
						
							
								3e8d6ea74f 
								
							 
						 
						
							
							
								
								Init POWER8 kernels by POWER6.  
							
							
							
						 
						
							2015-11-03 12:34:23 +08:00  
				
					
						
							
							
								 
						
							
								b07d733a71 
								
							 
						 
						
							
							
								
								added updates for syrk and syr2k  
							
							
							
						 
						
							2016-01-21 13:16:44 +01:00  
				
					
						
							
							
								 
						
							
								39937d15cd 
								
							 
						 
						
							
							
								
								Change BUFFER_SIZE for Cortex A57 to 20 MB  
							
							... 
							
							
							
							Change the GEMM_P, GEMM_Q, GEMM_R values for Cortex A57 
							
						 
						
							2015-11-20 01:12:04 +05:30  
				
					
						
							
							
								 
						
							
								1397b47197 
								
							 
						 
						
							
							
								
								Optimized zgemm kernel for CORTEXA57  
							
							
							
						 
						
							2015-11-09 14:15:53 +05:30  
				
					
						
							
							
								 
						
							
								45f78963ac 
								
							 
						 
						
							
							
								
								Optimized cgemm kernel for CORTEXA57  
							
							... 
							
							
							
							Also, add a generic ztrmm 4x4 kernel 
							
						 
						
							2015-11-09 14:15:53 +05:30  
				
					
						
							
							
								 
						
							
								402443bf9c 
								
							 
						 
						
							
							
								
								Optimized dgemm kernel for CORTEXA57  
							
							
							
						 
						
							2015-11-09 14:15:53 +05:30  
				
					
						
							
							
								 
						
							
								f2f8a0fe8b 
								
							 
						 
						
							
							
								
								Adding arm64 target CORTEXA57  
							
							... 
							
							
							
							Co-Authored-By: Ralph Campbell <ralph.campbell@broadcom.com> 
							
						 
						
							2015-11-09 14:15:50 +05:30  
				
					
						
							
							
								 
						
							
								9bd962f655 
								
							 
						 
						
							
							
								
								modified haswell parameter dgemm_unroll_n  
							
							
							
						 
						
							2015-06-13 10:28:27 +02:00  
				
					
						
							
							
								 
						
							
								51ff17d46e 
								
							 
						 
						
							
							
								
								Add AMD Excavator target.  
							
							
							
						 
						
							2015-05-13 16:16:30 -05:00  
				
					
						
							
							
								 
						
							
								229ce2ccd1 
								
							 
						 
						
							
							
								
								Add cortex-a9 and cortex-a15 targets.  
							
							
							
						 
						
							2015-01-12 08:55:29 +00:00  
				
					
						
							
							
								 
						
							
								ddf983d643 
								
							 
						 
						
							
							
								
								added optimizations for steamroller  
							
							
							
						 
						
							2014-12-30 20:14:45 +08:00  
				
					
						
							
							
								 
						
							
								4319769b79 
								
							 
						 
						
							
							
								
								added target processor STEAMROLLER  
							
							
							
						 
						
							2014-12-28 20:16:46 +08:00  
				
					
						
							
							
								 
						
							
								587e16fba3 
								
							 
						 
						
							
							
								
								Ref  #458 : Backport, sandybrigde uses nehalem zgemm kernel  
							
							
							
						 
						
							2014-12-22 17:01:18 +01:00  
				
					
						
							
							
								 
						
							
								2fb02626da 
								
							 
						 
						
							
							
								
								Update organization info.  
							
							
							
						 
						
							2014-11-25 15:28:58 +08:00  
				
					
						
							
							
								 
						
							
								a85c2785ae 
								
							 
						 
						
							
							
								
								Refs  #467 . Added generic kernel file for x86_64.  
							
							
							
						 
						
							2014-11-24 15:34:48 +08:00  
				
					
						
							
							
								 
						
							
								58c90d5937 
								
							 
						 
						
							
							
								
								# The first commit's message is:  
							
							... 
							
							
							
							Optimizations for APM's xgene-1 (aarch64).
1) general system updates to support armv8 better.  Make all did not work, one needed to supply TARGET=ARMV8.
2) sgem 4x4 kernel in assembler using SIMD, and configuration changes to use it.
3) strmm 4x4 kernel in C.  Since the sgem kernel does 4x4, the trmm kernel must also do 4xN.
Added Dave Nuechterlein to the contributors list. 
							
						 
						
							2014-11-11 22:19:23 +08:00  
				
					
						
							
							
								 
						
							
								9d7057366d 
								
							 
						 
						
							
							
								
								bugfix for GEMM3M functions  
							
							
							
						 
						
							2014-09-21 11:41:43 +02:00  
				
					
						
							
							
								 
						
							
								7aae4a62e7 
								
							 
						 
						
							
							
								
								enabled use of GEMM3M functions  
							
							
							
						 
						
							2014-09-20 14:27:10 +02:00  
				
					
						
							
							
								 
						
							
								5087096711 
								
							 
						 
						
							
							
								
								optimization of sandybridge cgemm-kernel  
							
							
							
						 
						
							2014-07-29 19:07:21 +02:00  
				
					
						
							
							
								 
						
							
								1cc02b4337 
								
							 
						 
						
							
							
								
								optimized sgemm kernel for haswell  
							
							
							
						 
						
							2014-07-28 11:50:01 +02:00  
				
					
						
							
							
								 
						
							
								125610d23b 
								
							 
						 
						
							
							
								
								allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk  
							
							
							
						 
						
							2014-07-24 18:43:31 +02:00  
				
					
						
							
							
								 
						
							
								99efbbbad5 
								
							 
						 
						
							
							
								
								Fixed   #395 . Enable optimized cgemm for Sandybridge. Added optimized sdot kernel.  
							
							... 
							
							
							
							Fixed c/zgemm, zgemv computational error of haswell, piledriver, bullldozer, and
barcelona on Windows.
Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS  into wernsaar-develop
Conflicts:
	kernel/Makefile.L1
	kernel/x86_64/KERNEL
	param.h 
							
						 
						
							2014-06-29 10:34:51 +08:00  
				
					
						
							
							
								 
						
							
								6c2ead30f0 
								
							 
						 
						
							
							
								
								Remove all trailing whitespace except lapack-netlib  
							
							... 
							
							
							
							Signed-off-by: Timothy Gu <timothygu99@gmail.com> 
							
						 
						
							2014-06-27 12:05:18 -07:00  
				
					
						
							
							
								 
						
							
								365e8de346 
								
							 
						 
						
							
							
								
								added optimized cgemm-kernel for SANDYBRIDGE  
							
							
							
						 
						
							2014-06-27 13:40:29 +02:00  
				
					
						
							
							
								 
						
							
								dabab2b5f4 
								
							 
						 
						
							
							
								
								added new optimized sgemm kernel for SANDYBRIGE  
							
							
							
						 
						
							2014-06-26 21:42:08 +02:00  
				
					
						
							
							
								 
						
							
								aa2709c4e0 
								
							 
						 
						
							
							
								
								enabled optimized dgemm kernel for NEHALEM  
							
							
							
						 
						
							2014-06-26 12:22:29 +02:00  
				
					
						
							
							
								 
						
							
								d83373db61 
								
							 
						 
						
							
							
								
								added parameter for gemm3m kernels  
							
							
							
						 
						
							2014-06-25 10:40:25 +02:00  
				
					
						
							
							
								 
						
							
								43fbdb7a5a 
								
							 
						 
						
							
							
								
								added ARMV5 as reference platform  
							
							
							
						 
						
							2014-05-13 17:25:19 +02:00  
				
					
						
							
							
								 
						
							
								5f3b68b4d4 
								
							 
						 
						
							
							
								
								replaced sgemm and cgemm kernels because lapack bugs  
							
							
							
						 
						
							2014-05-10 11:24:07 +02:00  
				
					
						
							
							
								 
						
							
								2424af62fd 
								
							 
						 
						
							
							
								
								replaced dgemm-kernel because bug in lapack  
							
							
							
						 
						
							2014-05-10 10:52:37 +02:00  
				
					
						
							
							
								 
						
							
								47b22763f8 
								
							 
						 
						
							
							
								
								reduced stack usage on windows to 16K  
							
							
							
						 
						
							2014-04-24 14:09:26 +02:00  
				
					
						
							
							
								 
						
							
								aae75b2461 
								
							 
						 
						
							
							
								
								modified param.h  
							
							
							
						 
						
							2013-12-01 18:43:24 +01:00  
				
					
						
							
							
								 
						
							
								b3254eecaf 
								
							 
						 
						
							
							
								
								Merge remote branch 'origin/haswell' into develop  
							
							
							
						 
						
							2013-12-01 18:09:12 +01:00  
				
					
						
							
							
								 
						
							
								ecbc85b954 
								
							 
						 
						
							
							
								
								modified param.h  
							
							
							
						 
						
							2013-12-01 17:54:53 +01:00  
				
					
						
							
							
								 
						
							
								afe44b0241 
								
							 
						 
						
							
							
								
								tests and code cleanup of gemm_kernels for HASWELL  
							
							
							
						 
						
							2013-10-28 14:23:48 +01:00  
				
					
						
							
							
								 
						
							
								a77c71eaf5 
								
							 
						 
						
							
							
								
								added highly optimized dgemm_kernel for HASWELL  
							
							
							
						 
						
							2013-10-28 10:23:47 +01:00  
				
					
						
							
							
								 
						
							
								fe8c5666f9 
								
							 
						 
						
							
							
								
								optimized dgemm_kernel for HASWELL  
							
							
							
						 
						
							2013-10-20 16:52:26 +02:00  
				
					
						
							
							
								 
						
							
								2638370844 
								
							 
						 
						
							
							
								
								Init code base for Intel Haswell.  
							
							
							
						 
						
							2013-08-13 00:54:59 +08:00  
				
					
						
							
							
								 
						
							
								886cbaf4e4 
								
							 
						 
						
							
							
								
								Support AMD Piledriver by bulldozer kernels.  
							
							
							
						 
						
							2013-07-06 12:06:43 -03:00  
				
					
						
							
							
								 
						
							
								6e8501c8a1 
								
							 
						 
						
							
							
								
								Fixed   #239  bug in param.h about BARCELONA and BULLDOZER.  
							
							
							
						 
						
							2013-06-29 10:36:01 +08:00  
				
					
						
							
							
								 
						
							
								f67fa62851 
								
							 
						 
						
							
							
								
								added dgemv_n_bulldozer.S  
							
							
							
						 
						
							2013-06-15 16:42:37 +02:00  
				
					
						
							
							
								 
						
							
								d65bbec99b 
								
							 
						 
						
							
							
								
								added new sgemm kernel for BULLDOZER  
							
							
							
						 
						
							2013-06-09 15:57:42 +02:00  
				
					
						
							
							
								 
						
							
								ba800f0883 
								
							 
						 
						
							
							
								
								correct GEMM_THREAD in param.h  
							
							
							
						 
						
							2013-06-08 10:03:59 +02:00  
				
					
						
							
							
								 
						
							
								25491e42f9 
								
							 
						 
						
							
							
								
								New dgemm kernel for BULLDOZER: dgemm_kernel_8x2_bulldozer.S  
							
							
							
						 
						
							2013-06-08 09:40:17 +02:00  
				
					
						
							
							
								 
						
							
								731220f870 
								
							 
						 
						
							
							
								
								changed DGEMM_DEFAULT_P and DGEMM_DEFAULT_Q to 248 for BULLDOZER 64bit  
							
							
							
						 
						
							2013-04-30 10:07:17 +02:00  
				
					
						
							
							
								 
						
							
								b7c0fa6bd2 
								
							 
						 
						
							
							
								
								Init AMD Bulldozer codebase.  
							
							
							
						 
						
							2012-12-06 07:29:54 -05:00  
				
					
						
							
							
								 
						
							
								01e3c984ce 
								
							 
						 
						
							
							
								
								Fix compilation with TARGET=GENERIC  
							
							... 
							
							
							
							Patch applied to Debian package 
							
						 
						
							2012-11-14 21:04:05 +01:00  
				
					
						
							
							
								 
						
							
								3692b4d631 
								
							 
						 
						
							
							
								
								Improve the detection of sparc  
							
							
							
						 
						
							2012-07-02 02:51:38 +02:00  
				
					
						
							
							
								 
						
							
								b39c51195b 
								
							 
						 
						
							
							
								
								Fixed the build bug about Sandy Bridge on 32-bit.  
							
							... 
							
							
							
							We used Nehalem/Penryn codes on Sandy Bridge 32-bit. 
							
						 
						
							2012-06-25 14:29:17 +08:00  
				
					
						
							
							
								 
						
							
								996dc6d1c8 
								
							 
						 
						
							
							
								
								Fixed dynamic_arch building bug.  
							
							
							
						 
						
							2012-06-19 17:29:06 +08:00  
				
					
						
							
							
								 
						
							
								f76f952547 
								
							 
						 
						
							
							
								
								Refs  #83   #53 . Adding Intel Sandy Bridge (AVX supported) kernel codes for BLAS level 3 functions.  
							
							
							
						 
						
							2012-06-19 16:37:12 +08:00  
				
					
						
							
							
								 
						
							
								d3b67d0bd8 
								
							 
						 
						
							
							
								
								Refs  #113 . Fixed the typo BOBCATE -> BOBCAT  
							
							
							
						 
						
							2012-05-31 22:40:15 +08:00  
				
					
						
							
							
								 
						
							
								d6cab3f37e 
								
							 
						 
						
							
							
								
								Refs  #113 . Support AMD Bobcate using Barcelona kernel codes. Replace 3DNow! with MMX.  
							
							
							
						 
						
							2012-05-31 18:17:45 +08:00  
				
					
						
							
							
								 
						
							
								19a48b82cf 
								
							 
						 
						
							
							
								
								Init Sandybridge codes based on Nehalem.  
							
							
							
						 
						
							2012-03-30 20:01:03 +08:00  
				
					
						
							
							
								 
						
							
								7af0139a09 
								
							 
						 
						
							
							
								
								Modify P Q R size of Loongson3b.  
							
							
							
						 
						
							2012-01-11 16:05:39 +00:00  
				
					
						
							
							
								 
						
							
								66904fc4e8 
								
							 
						 
						
							
							
								
								BLAS3 used standard MIPS instructions without extensions on Loongson 3B.  
							
							
							
						 
						
							2011-11-25 11:20:25 +00:00  
				
					
						
							
							
								 
						
							
								8163ab7e55 
								
							 
						 
						
							
							
								
								Change the block size on Loongson 3B.  
							
							
							
						 
						
							2011-11-23 18:41:49 +00:00  
				
					
						
							
							
								 
						
							
								b95ad4cfaf 
								
							 
						 
						
							
							
								
								Support detecting ICT Loongson-3B CPU.  
							
							
							
						 
						
							2011-11-09 19:29:50 +00:00  
				
					
						
							
							
								 
						
							
								831858b883 
								
							 
						 
						
							
							
								
								Modify aligned address of sa and sb to improve the performance of multi-threads.  
							
							
							
						 
						
							2011-09-23 20:59:48 +00:00  
				
					
						
							
							
								 
						
							
								d238a768ab 
								
							 
						 
						
							
							
								
								Use ps instructions in cgemm.  
							
							
							
						 
						
							2011-09-14 15:32:25 +00:00  
				
					
						
							
							
								 
						
							
								4727fe8abf 
								
							 
						 
						
							
							
								
								Refs  #47 . On Loongson 3A, set DGEMM_R parameter depending on different number of threads. It would improve double precision BLAS3 on multi-threads.  
							
							
							
						 
						
							2011-09-05 15:13:52 +00:00  
				
					
						
							
							
								 
						
							
								74a3f63489 
								
							 
						 
						
							
							
								
								Tuning mb, kb, nb size to get the best performance.  
							
							
							
						 
						
							2011-09-01 17:15:28 +00:00  
				
					
						
							
							
								 
						
							
								cb0214787b 
								
							 
						 
						
							
							
								
								Modify compile options.  
							
							
							
						 
						
							2011-08-30 20:57:00 +00:00  
				
					
						
							
							
								 
						
							
								c8360e3ae5 
								
							 
						 
						
							
							
								
								Complete all the plura single precision functions of level3 on Loongson3a, the performance is 2.3GFlops.  
							
							
							
						 
						
							2011-07-18 17:03:38 +00:00  
				
					
						
							
							
								 
						
							
								e72113f06a 
								
							 
						 
						
							
							
								
								Add ztrmm and ztrsm part on loongson3a. The average performance is 2.2G.  
							
							
							
						 
						
							2011-06-23 21:11:00 +00:00  
				
					
						
							
							
								 
						
							
								1c96d345e2 
								
							 
						 
						
							
							
								
								Improve zgemm performance from 1G to 1.8G, change block size in param.h.  
							
							
							
						 
						
							2011-06-21 22:16:23 +00:00  
				
					
						
							
							
								 
						
							
								88d94d0ec8 
								
							 
						 
						
							
							
								
								Fixed   #30  strmm computational error on Loongson3A.  
							
							
							
						 
						
							2011-05-28 09:48:34 +00:00  
				
					
						
							
							
								 
						
							
								ab9e4ce351 
								
							 
						 
						
							
							
								
								Adjust kc size from 112 to 116 .  
							
							
							
						 
						
							2011-04-11 22:17:57 +00:00  
				
					
						
							
							
								 
						
							
								1aa9a298e1 
								
							 
						 
						
							
							
								
								Change BLOCK SIZE of LOONGSON3A TARGET.  
							
							
							
						 
						
							2011-04-06 10:39:31 +00:00  
				
					
						
							
							
								 
						
							
								0597c1076f 
								
							 
						 
						
							
							
								
								Added the configures of loongson 3a. refs  #1  
							
							
							
						 
						
							2011-01-24 22:45:35 +00:00  
				
					
						
							
							
								 
						
							
								342bbc3871 
								
							 
						 
						
							
							
								
								Import GotoBLAS2 1.13 BSD version codes.  
							
							
							
						 
						
							2011-01-24 14:54:24 +00:00