Merge pull request #421 from wernsaar/develop

optimized sgemm- and cgemm-kernel for haswell
This commit is contained in:
Zhang Xianyi 2014-07-29 15:50:00 +08:00
commit 21f7768b26
3 changed files with 8607 additions and 2315 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1237,10 +1237,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_P 384 #define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 256 #define ZGEMM_DEFAULT_P 256
#define SGEMM_DEFAULT_Q 384
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
#define SGEMM_DEFAULT_Q 320
#define DGEMM_DEFAULT_Q 128 #define DGEMM_DEFAULT_Q 128
#else #else
#define SGEMM_DEFAULT_Q 384
#define DGEMM_DEFAULT_Q 256 #define DGEMM_DEFAULT_Q 256
#endif #endif
#define CGEMM_DEFAULT_Q 192 #define CGEMM_DEFAULT_Q 192