Abdurrauf
							
						 
						
							 
							
							
							
							
								
							
							
								08786c4b95 
								
							 
						 
						
							
							
								
								strmm and ctrmm  
							
							 
							
							
							
						 
						
							2017-03-13 01:23:16 +04:00  
						
					 
				
					
						
							
							
								 
								Abdurrauf
							
						 
						
							 
							
							
							
							
								
							
							
								82e80fa82b 
								
							 
						 
						
							
							
								
								initial strmm(sgemm). not tuned yet  
							
							 
							
							
							
						 
						
							2017-03-06 04:27:40 +04:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
							
							
								
							
							
								ffc1d6c468 
								
							 
						 
						
							
							
								
								Merge pull request  #1108  from ashwinyes/develop_20170203_thunderx2t99  
							
							 
							
							... 
							
							
							
							Optimized Implementations for ThunderX2T99 
							
						 
						
							2017-02-28 16:02:19 +01:00  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								19ba133383 
								
							 
						 
						
							
							
								
								THUNDERX2T99: Add Optimized ZGEMM Implementation  
							
							 
							
							
							
						 
						
							2017-02-28 05:31:41 +00:00  
						
					 
				
					
						
							
							
								 
								Abdurrauf
							
						 
						
							 
							
							
							
							
								
							
							
								0d96b0e2a7 
								
							 
						 
						
							
							
								
								Merge branch 'z13' into develop  
							
							 
							
							
							
						 
						
							2017-02-26 06:17:33 +04:00  
						
					 
				
					
						
							
							
								 
								Abdurrauf
							
						 
						
							 
							
							
							
							
								
							
							
								848cb27b1e 
								
							 
						 
						
							
							
								
								ztrmm kernel.  
							
							 
							
							
							
						 
						
							2017-02-26 06:14:12 +04:00  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								2757b49767 
								
							 
						 
						
							
							
								
								THUNDERX2T99: Add Optimized CGEMM Implementation  
							
							 
							
							
							
						 
						
							2017-01-30 17:44:26 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								f279ff4789 
								
							 
						 
						
							
							
								
								THUNDERX2T99: Add Optimized SGEMM Implementation  
							
							 
							
							
							
						 
						
							2017-01-16 21:44:33 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								4b55fae337 
								
							 
						 
						
							
							
								
								ARM64: Add Cavium THUNDERX2T99 Target  
							
							 
							
							
							
						 
						
							2017-01-11 11:18:40 +05:30  
						
					 
				
					
						
							
							
								 
								Andrew Pinski
							
						 
						
							 
							
							
							
							
								
							
							
								fb200c7245 
								
							 
						 
						
							
							
								
								ARM64: Add Cavium THUNDERX Target  
							
							 
							
							
							
						 
						
							2017-01-10 15:01:37 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								4713e7c47f 
								
							 
						 
						
							
							
								
								ARM64: Add the VULCAN Target  
							
							 
							
							
							
						 
						
							2017-01-10 15:01:17 +05:30  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								b678471d65 
								
							 
						 
						
							
							
								
								Merge branch 'z13' into develop  
							
							 
							
							... 
							
							
							
							Conflicts:
	CONTRIBUTORS.md 
							
						 
						
							2017-01-09 05:52:42 -05:00  
						
					 
				
					
						
							
							
								 
								Abdurrauf
							
						 
						
							 
							
							
							
							
								
							
							
								6418667818 
								
							 
						 
						
							
							
								
								dtrmm and dgemm for z13  
							
							 
							
							
							
						 
						
							2017-01-04 19:32:33 +04:00  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								9687437928 
								
							 
						 
						
							
							
								
								MIPS n32 ABI and build time mips simd support check  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-08-10 17:44:22 +05:30  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								d1c6469283 
								
							 
						 
						
							
							
								
								MIPS n32 ABI support, MSA support detection and rename ARCH, ARCHFLAGS  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-08-08 11:58:01 +05:30  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								beb1d076a4 
								
							 
						 
						
							
							
								
								Added MSA optimization for GEMV_N, GEMV_T, ASUM, DOT functions  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-07-15 18:38:25 +05:30  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								8a592ee386 
								
							 
						 
						
							
							
								
								Merge pull request  #924  from ashwinyes/develop_aarch64_improvements_20160714  
							
							 
							
							... 
							
							
							
							Improvements to Aarch64 kernels 
							
						 
						
							2016-07-14 15:47:55 -04:00  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								0a5ff9f9f9 
								
							 
						 
						
							
							
								
								Improvements to TRMM and GEMM kernels  
							
							 
							
							
							
						 
						
							2016-07-14 13:56:04 +05:30  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								57df7956ee 
								
							 
						 
						
							
							
								
								Added CGEMM, ZGEMM, STRMM, DTRMM, CTRMM, ZTRMM. Updated macros in SGEMM, DGEMM, STRMM.  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-06-28 17:51:10 +05:30  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								c4ba40e308 
								
							 
						 
						
							
							
								
								SGEMM optimization for MIPS P5600 and I6400 using MSA. Unrolled k loop in DGEMM kernel function  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-05-19 11:04:42 +05:30  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								88011f625d 
								
							 
						 
						
							
							
								
								Merge pull request  #876  from wernsaar/develop  
							
							 
							
							... 
							
							
							
							optimized dgemm on power8 for 20 threads 
							
						 
						
							2016-05-16 14:52:40 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								8310d4d3f7 
								
							 
						 
						
							
							
								
								optimized dgemm for 20 threads  
							
							 
							
							
							
						 
						
							2016-05-16 14:14:25 +02:00  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								085cf236c2 
								
							 
						 
						
							
							
								
								conflict resolved by syncing with 'xianyi:develop'  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-05-04 11:07:14 +05:30  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								b7b3d8ec8e 
								
							 
						 
						
							
							
								
								DGEMM optimization for MIPS P5600 and I6400 using MSA  
							
							 
							
							... 
							
							
							
							Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-05-03 14:42:26 +05:30  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								cd7af5260a 
								
							 
						 
						
							
							
								
								Merge pull request  #847  from sva-img/develop  
							
							 
							
							... 
							
							
							
							MIPS P5600(32 bit) and I6400(64 bit) cores support added. 
							
						 
						
							2016-04-29 11:44:36 -04:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								782f75ba94 
								
							 
						 
						
							
							
								
								optimized param.h for POWER8  
							
							 
							
							
							
						 
						
							2016-04-27 15:48:09 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								0d0c6f7d7d 
								
							 
						 
						
							
							
								
								optimized dgemm for POWER8  
							
							 
							
							
							
						 
						
							2016-04-27 14:01:08 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								40ac64ae4f 
								
							 
						 
						
							
							
								
								updated param.h for EXCAVATOR  
							
							 
							
							
							
						 
						
							2016-04-25 10:40:04 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								089aad57f7 
								
							 
						 
						
							
							
								
								updated param.h for POWER8  
							
							 
							
							
							
						 
						
							2016-04-23 14:26:24 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								879a51165f 
								
							 
						 
						
							
							
								
								Optimized zgemm and tested zgemm again  
							
							 
							
							
							
						 
						
							2016-04-22 13:07:12 +02:00  
						
					 
				
					
						
							
							
								 
								Shivraj Patil
							
						 
						
							 
							
							
							
							
								
							
							
								2c3dfe2bf3 
								
							 
						 
						
							
							
								
								MIPS P5600(32 bit) and I6400(64 bit) cores support added.  
							
							 
							
							... 
							
							
							
							Seperated mips and mips64 files.
Configurations support for mips 32 bit.
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-04-22 14:03:18 +05:30  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								3c6294ca3d 
								
							 
						 
						
							
							
								
								added optimized sgemm_tcopy for power8  
							
							 
							
							
							
						 
						
							2016-04-19 16:08:54 +02:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								dd43661cfd 
								
							 
						 
						
							
							
								
								Init IBM z system (s390x) porting.  
							
							 
							
							
							
						 
						
							2016-04-15 18:02:24 -04:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								e173c51c04 
								
							 
						 
						
							
							
								
								updated zgemm- and ztrmm-kernel for POWER8  
							
							 
							
							
							
						 
						
							2016-04-08 09:05:37 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								9c42f0374a 
								
							 
						 
						
							
							
								
								Updated cgemm- and sgemm-kernel for POWER8 SMP  
							
							 
							
							
							
						 
						
							2016-04-07 15:08:15 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								a51102e9b7 
								
							 
						 
						
							
							
								
								bugfixes for sgemm- and cgemm-kernel  
							
							 
							
							
							
						 
						
							2016-04-06 11:15:21 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								c5b1fbcb2e 
								
							 
						 
						
							
							
								
								updated optimized cgemm- and ctrmm-kernel for POWER8  
							
							 
							
							
							
						 
						
							2016-04-04 09:12:08 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								6a9bbfc227 
								
							 
						 
						
							
							
								
								updated sgemm- and strmm-kernel for POWER8  
							
							 
							
							
							
						 
						
							2016-04-02 17:16:36 +02:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								e1df5a6e23 
								
							 
						 
						
							
							
								
								fixed sgemm- and strmm-kernel  
							
							 
							
							
							
						 
						
							2016-03-18 12:12:03 +01:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								5c658f8746 
								
							 
						 
						
							
							
								
								add optimized cgemm- and ctrmm-kernel for POWER8  
							
							 
							
							
							
						 
						
							2016-03-18 08:17:25 +01:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								96284ab295 
								
							 
						 
						
							
							
								
								added sgemm- and strmm-kernel for POWER8  
							
							 
							
							
							
						 
						
							2016-03-14 13:52:44 +01:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								91e1c5080c 
								
							 
						 
						
							
							
								
								modified configuration, to use power6 sgemm kernel for power8  
							
							 
							
							
							
						 
						
							2016-03-04 13:38:57 +01:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								b752858d6c 
								
							 
						 
						
							
							
								
								added dgemm-, dtrmm-, zgemm- and ztrmm-kernel for power8  
							
							 
							
							
							
						 
						
							2016-03-01 07:33:56 +01:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								3e8d6ea74f 
								
							 
						 
						
							
							
								
								Init POWER8 kernels by POWER6.  
							
							 
							
							
							
						 
						
							2015-11-03 12:34:23 +08:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								b07d733a71 
								
							 
						 
						
							
							
								
								added updates for syrk and syr2k  
							
							 
							
							
							
						 
						
							2016-01-21 13:16:44 +01:00  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								39937d15cd 
								
							 
						 
						
							
							
								
								Change BUFFER_SIZE for Cortex A57 to 20 MB  
							
							 
							
							... 
							
							
							
							Change the GEMM_P, GEMM_Q, GEMM_R values for Cortex A57 
							
						 
						
							2015-11-20 01:12:04 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								1397b47197 
								
							 
						 
						
							
							
								
								Optimized zgemm kernel for CORTEXA57  
							
							 
							
							
							
						 
						
							2015-11-09 14:15:53 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								45f78963ac 
								
							 
						 
						
							
							
								
								Optimized cgemm kernel for CORTEXA57  
							
							 
							
							... 
							
							
							
							Also, add a generic ztrmm 4x4 kernel 
							
						 
						
							2015-11-09 14:15:53 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								402443bf9c 
								
							 
						 
						
							
							
								
								Optimized dgemm kernel for CORTEXA57  
							
							 
							
							
							
						 
						
							2015-11-09 14:15:53 +05:30  
						
					 
				
					
						
							
							
								 
								Ashwin Sekhar T K
							
						 
						
							 
							
							
							
							
								
							
							
								f2f8a0fe8b 
								
							 
						 
						
							
							
								
								Adding arm64 target CORTEXA57  
							
							 
							
							... 
							
							
							
							Co-Authored-By: Ralph Campbell <ralph.campbell@broadcom.com> 
							
						 
						
							2015-11-09 14:15:50 +05:30  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								9bd962f655 
								
							 
						 
						
							
							
								
								modified haswell parameter dgemm_unroll_n  
							
							 
							
							
							
						 
						
							2015-06-13 10:28:27 +02:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								51ff17d46e 
								
							 
						 
						
							
							
								
								Add AMD Excavator target.  
							
							 
							
							
							
						 
						
							2015-05-13 16:16:30 -05:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								229ce2ccd1 
								
							 
						 
						
							
							
								
								Add cortex-a9 and cortex-a15 targets.  
							
							 
							
							
							
						 
						
							2015-01-12 08:55:29 +00:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								ddf983d643 
								
							 
						 
						
							
							
								
								added optimizations for steamroller  
							
							 
							
							
							
						 
						
							2014-12-30 20:14:45 +08:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								4319769b79 
								
							 
						 
						
							
							
								
								added target processor STEAMROLLER  
							
							 
							
							
							
						 
						
							2014-12-28 20:16:46 +08:00  
						
					 
				
					
						
							
							
								 
								Werner Saar
							
						 
						
							 
							
							
							
							
								
							
							
								587e16fba3 
								
							 
						 
						
							
							
								
								Ref  #458 : Backport, sandybrigde uses nehalem zgemm kernel  
							
							 
							
							
							
						 
						
							2014-12-22 17:01:18 +01:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								2fb02626da 
								
							 
						 
						
							
							
								
								Update organization info.  
							
							 
							
							
							
						 
						
							2014-11-25 15:28:58 +08:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								a85c2785ae 
								
							 
						 
						
							
							
								
								Refs  #467 . Added generic kernel file for x86_64.  
							
							 
							
							
							
						 
						
							2014-11-24 15:34:48 +08:00  
						
					 
				
					
						
							
							
								 
								Benedikt Huber
							
						 
						
							 
							
							
							
							
								
							
							
								58c90d5937 
								
							 
						 
						
							
							
								
								# The first commit's message is:  
							
							 
							
							... 
							
							
							
							Optimizations for APM's xgene-1 (aarch64).
1) general system updates to support armv8 better.  Make all did not work, one needed to supply TARGET=ARMV8.
2) sgem 4x4 kernel in assembler using SIMD, and configuration changes to use it.
3) strmm 4x4 kernel in C.  Since the sgem kernel does 4x4, the trmm kernel must also do 4xN.
Added Dave Nuechterlein to the contributors list. 
							
						 
						
							2014-11-11 22:19:23 +08:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								9d7057366d 
								
							 
						 
						
							
							
								
								bugfix for GEMM3M functions  
							
							 
							
							
							
						 
						
							2014-09-21 11:41:43 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								7aae4a62e7 
								
							 
						 
						
							
							
								
								enabled use of GEMM3M functions  
							
							 
							
							
							
						 
						
							2014-09-20 14:27:10 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								5087096711 
								
							 
						 
						
							
							
								
								optimization of sandybridge cgemm-kernel  
							
							 
							
							
							
						 
						
							2014-07-29 19:07:21 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								1cc02b4337 
								
							 
						 
						
							
							
								
								optimized sgemm kernel for haswell  
							
							 
							
							
							
						 
						
							2014-07-28 11:50:01 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								125610d23b 
								
							 
						 
						
							
							
								
								allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk  
							
							 
							
							
							
						 
						
							2014-07-24 18:43:31 +02:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								99efbbbad5 
								
							 
						 
						
							
							
								
								Fixed   #395 . Enable optimized cgemm for Sandybridge. Added optimized sdot kernel.  
							
							 
							
							... 
							
							
							
							Fixed c/zgemm, zgemv computational error of haswell, piledriver, bullldozer, and
barcelona on Windows.
Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS  into wernsaar-develop
Conflicts:
	kernel/Makefile.L1
	kernel/x86_64/KERNEL
	param.h 
							
						 
						
							2014-06-29 10:34:51 +08:00  
						
					 
				
					
						
							
							
								 
								Timothy Gu
							
						 
						
							 
							
							
							
							
								
							
							
								6c2ead30f0 
								
							 
						 
						
							
							
								
								Remove all trailing whitespace except lapack-netlib  
							
							 
							
							... 
							
							
							
							Signed-off-by: Timothy Gu <timothygu99@gmail.com> 
							
						 
						
							2014-06-27 12:05:18 -07:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								365e8de346 
								
							 
						 
						
							
							
								
								added optimized cgemm-kernel for SANDYBRIDGE  
							
							 
							
							
							
						 
						
							2014-06-27 13:40:29 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								dabab2b5f4 
								
							 
						 
						
							
							
								
								added new optimized sgemm kernel for SANDYBRIGE  
							
							 
							
							
							
						 
						
							2014-06-26 21:42:08 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								aa2709c4e0 
								
							 
						 
						
							
							
								
								enabled optimized dgemm kernel for NEHALEM  
							
							 
							
							
							
						 
						
							2014-06-26 12:22:29 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								d83373db61 
								
							 
						 
						
							
							
								
								added parameter for gemm3m kernels  
							
							 
							
							
							
						 
						
							2014-06-25 10:40:25 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								43fbdb7a5a 
								
							 
						 
						
							
							
								
								added ARMV5 as reference platform  
							
							 
							
							
							
						 
						
							2014-05-13 17:25:19 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								5f3b68b4d4 
								
							 
						 
						
							
							
								
								replaced sgemm and cgemm kernels because lapack bugs  
							
							 
							
							
							
						 
						
							2014-05-10 11:24:07 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								2424af62fd 
								
							 
						 
						
							
							
								
								replaced dgemm-kernel because bug in lapack  
							
							 
							
							
							
						 
						
							2014-05-10 10:52:37 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								47b22763f8 
								
							 
						 
						
							
							
								
								reduced stack usage on windows to 16K  
							
							 
							
							
							
						 
						
							2014-04-24 14:09:26 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								aae75b2461 
								
							 
						 
						
							
							
								
								modified param.h  
							
							 
							
							
							
						 
						
							2013-12-01 18:43:24 +01:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								b3254eecaf 
								
							 
						 
						
							
							
								
								Merge remote branch 'origin/haswell' into develop  
							
							 
							
							
							
						 
						
							2013-12-01 18:09:12 +01:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								ecbc85b954 
								
							 
						 
						
							
							
								
								modified param.h  
							
							 
							
							
							
						 
						
							2013-12-01 17:54:53 +01:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								afe44b0241 
								
							 
						 
						
							
							
								
								tests and code cleanup of gemm_kernels for HASWELL  
							
							 
							
							
							
						 
						
							2013-10-28 14:23:48 +01:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								a77c71eaf5 
								
							 
						 
						
							
							
								
								added highly optimized dgemm_kernel for HASWELL  
							
							 
							
							
							
						 
						
							2013-10-28 10:23:47 +01:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								fe8c5666f9 
								
							 
						 
						
							
							
								
								optimized dgemm_kernel for HASWELL  
							
							 
							
							
							
						 
						
							2013-10-20 16:52:26 +02:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								2638370844 
								
							 
						 
						
							
							
								
								Init code base for Intel Haswell.  
							
							 
							
							
							
						 
						
							2013-08-13 00:54:59 +08:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								886cbaf4e4 
								
							 
						 
						
							
							
								
								Support AMD Piledriver by bulldozer kernels.  
							
							 
							
							
							
						 
						
							2013-07-06 12:06:43 -03:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								6e8501c8a1 
								
							 
						 
						
							
							
								
								Fixed   #239  bug in param.h about BARCELONA and BULLDOZER.  
							
							 
							
							
							
						 
						
							2013-06-29 10:36:01 +08:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								f67fa62851 
								
							 
						 
						
							
							
								
								added dgemv_n_bulldozer.S  
							
							 
							
							
							
						 
						
							2013-06-15 16:42:37 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								d65bbec99b 
								
							 
						 
						
							
							
								
								added new sgemm kernel for BULLDOZER  
							
							 
							
							
							
						 
						
							2013-06-09 15:57:42 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								ba800f0883 
								
							 
						 
						
							
							
								
								correct GEMM_THREAD in param.h  
							
							 
							
							
							
						 
						
							2013-06-08 10:03:59 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								25491e42f9 
								
							 
						 
						
							
							
								
								New dgemm kernel for BULLDOZER: dgemm_kernel_8x2_bulldozer.S  
							
							 
							
							
							
						 
						
							2013-06-08 09:40:17 +02:00  
						
					 
				
					
						
							
							
								 
								wernsaar
							
						 
						
							 
							
							
							
							
								
							
							
								731220f870 
								
							 
						 
						
							
							
								
								changed DGEMM_DEFAULT_P and DGEMM_DEFAULT_Q to 248 for BULLDOZER 64bit  
							
							 
							
							
							
						 
						
							2013-04-30 10:07:17 +02:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								b7c0fa6bd2 
								
							 
						 
						
							
							
								
								Init AMD Bulldozer codebase.  
							
							 
							
							
							
						 
						
							2012-12-06 07:29:54 -05:00  
						
					 
				
					
						
							
							
								 
								Sébastien Villemot
							
						 
						
							 
							
							
							
							
								
							
							
								01e3c984ce 
								
							 
						 
						
							
							
								
								Fix compilation with TARGET=GENERIC  
							
							 
							
							... 
							
							
							
							Patch applied to Debian package 
							
						 
						
							2012-11-14 21:04:05 +01:00  
						
					 
				
					
						
							
							
								 
								Sylvestre Ledru
							
						 
						
							 
							
							
							
							
								
							
							
								3692b4d631 
								
							 
						 
						
							
							
								
								Improve the detection of sparc  
							
							 
							
							
							
						 
						
							2012-07-02 02:51:38 +02:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								b39c51195b 
								
							 
						 
						
							
							
								
								Fixed the build bug about Sandy Bridge on 32-bit.  
							
							 
							
							... 
							
							
							
							We used Nehalem/Penryn codes on Sandy Bridge 32-bit. 
							
						 
						
							2012-06-25 14:29:17 +08:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								996dc6d1c8 
								
							 
						 
						
							
							
								
								Fixed dynamic_arch building bug.  
							
							 
							
							
							
						 
						
							2012-06-19 17:29:06 +08:00  
						
					 
				
					
						
							
							
								 
								wangqian
							
						 
						
							 
							
							
							
							
								
							
							
								f76f952547 
								
							 
						 
						
							
							
								
								Refs  #83   #53 . Adding Intel Sandy Bridge (AVX supported) kernel codes for BLAS level 3 functions.  
							
							 
							
							
							
						 
						
							2012-06-19 16:37:12 +08:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								d3b67d0bd8 
								
							 
						 
						
							
							
								
								Refs  #113 . Fixed the typo BOBCATE -> BOBCAT  
							
							 
							
							
							
						 
						
							2012-05-31 22:40:15 +08:00  
						
					 
				
					
						
							
							
								 
								Zhang Xianyi
							
						 
						
							 
							
							
							
							
								
							
							
								d6cab3f37e 
								
							 
						 
						
							
							
								
								Refs  #113 . Support AMD Bobcate using Barcelona kernel codes. Replace 3DNow! with MMX.  
							
							 
							
							
							
						 
						
							2012-05-31 18:17:45 +08:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								19a48b82cf 
								
							 
						 
						
							
							
								
								Init Sandybridge codes based on Nehalem.  
							
							 
							
							
							
						 
						
							2012-03-30 20:01:03 +08:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								7af0139a09 
								
							 
						 
						
							
							
								
								Modify P Q R size of Loongson3b.  
							
							 
							
							
							
						 
						
							2012-01-11 16:05:39 +00:00  
						
					 
				
					
						
							
							
								 
								Wang Qian
							
						 
						
							 
							
							
							
							
								
							
							
								66904fc4e8 
								
							 
						 
						
							
							
								
								BLAS3 used standard MIPS instructions without extensions on Loongson 3B.  
							
							 
							
							
							
						 
						
							2011-11-25 11:20:25 +00:00  
						
					 
				
					
						
							
							
								 
								Wang Qian
							
						 
						
							 
							
							
							
							
								
							
							
								8163ab7e55 
								
							 
						 
						
							
							
								
								Change the block size on Loongson 3B.  
							
							 
							
							
							
						 
						
							2011-11-23 18:41:49 +00:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								b95ad4cfaf 
								
							 
						 
						
							
							
								
								Support detecting ICT Loongson-3B CPU.  
							
							 
							
							
							
						 
						
							2011-11-09 19:29:50 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								831858b883 
								
							 
						 
						
							
							
								
								Modify aligned address of sa and sb to improve the performance of multi-threads.  
							
							 
							
							
							
						 
						
							2011-09-23 20:59:48 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								d238a768ab 
								
							 
						 
						
							
							
								
								Use ps instructions in cgemm.  
							
							 
							
							
							
						 
						
							2011-09-14 15:32:25 +00:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								4727fe8abf 
								
							 
						 
						
							
							
								
								Refs  #47 . On Loongson 3A, set DGEMM_R parameter depending on different number of threads. It would improve double precision BLAS3 on multi-threads.  
							
							 
							
							
							
						 
						
							2011-09-05 15:13:52 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								74a3f63489 
								
							 
						 
						
							
							
								
								Tuning mb, kb, nb size to get the best performance.  
							
							 
							
							
							
						 
						
							2011-09-01 17:15:28 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								cb0214787b 
								
							 
						 
						
							
							
								
								Modify compile options.  
							
							 
							
							
							
						 
						
							2011-08-30 20:57:00 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								c8360e3ae5 
								
							 
						 
						
							
							
								
								Complete all the plura single precision functions of level3 on Loongson3a, the performance is 2.3GFlops.  
							
							 
							
							
							
						 
						
							2011-07-18 17:03:38 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								e72113f06a 
								
							 
						 
						
							
							
								
								Add ztrmm and ztrsm part on loongson3a. The average performance is 2.2G.  
							
							 
							
							
							
						 
						
							2011-06-23 21:11:00 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								1c96d345e2 
								
							 
						 
						
							
							
								
								Improve zgemm performance from 1G to 1.8G, change block size in param.h.  
							
							 
							
							
							
						 
						
							2011-06-21 22:16:23 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								88d94d0ec8 
								
							 
						 
						
							
							
								
								Fixed   #30  strmm computational error on Loongson3A.  
							
							 
							
							
							
						 
						
							2011-05-28 09:48:34 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								ab9e4ce351 
								
							 
						 
						
							
							
								
								Adjust kc size from 112 to 116 .  
							
							 
							
							
							
						 
						
							2011-04-11 22:17:57 +00:00  
						
					 
				
					
						
							
							
								 
								traz
							
						 
						
							 
							
							
							
							
								
							
							
								1aa9a298e1 
								
							 
						 
						
							
							
								
								Change BLOCK SIZE of LOONGSON3A TARGET.  
							
							 
							
							
							
						 
						
							2011-04-06 10:39:31 +00:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								0597c1076f 
								
							 
						 
						
							
							
								
								Added the configures of loongson 3a. refs  #1  
							
							 
							
							
							
						 
						
							2011-01-24 22:45:35 +00:00  
						
					 
				
					
						
							
							
								 
								Xianyi Zhang
							
						 
						
							 
							
							
							
							
								
							
							
								342bbc3871 
								
							 
						 
						
							
							
								
								Import GotoBLAS2 1.13 BSD version codes.  
							
							 
							
							
							
						 
						
							2011-01-24 14:54:24 +00:00