wernsaar
|
5ae1731fe6
|
better optimzations for sgemv_t kernel
|
2014-09-06 21:28:57 +02:00 |
wernsaar
|
c8eaf3ae2d
|
optimized sgemv_t_4 kernel for very small sizes
|
2014-09-06 19:41:57 +02:00 |
wernsaar
|
3a7ab47ee9
|
optimized sgemv_t
|
2014-09-06 18:34:25 +02:00 |
wernsaar
|
cf5544b417
|
optimization for small size
|
2014-09-06 13:17:56 +02:00 |
wernsaar
|
d143f84dd2
|
added optimized sgemv_n kernel for haswell
|
2014-09-06 12:08:48 +02:00 |
wernsaar
|
a64fe9bcc9
|
added optimized sgemv_n kernel for sandybridge
|
2014-09-06 08:41:53 +02:00 |
wernsaar
|
6df7a88930
|
optimized sgemv_t for sandybridge
|
2014-09-05 10:22:50 +02:00 |
wernsaar
|
53de943690
|
bugfix for sgemv_n_4.c
|
2014-09-04 18:55:52 +02:00 |
wernsaar
|
7f910010a0
|
optimized sgemv_n kernel for small sizes
|
2014-09-04 13:09:27 +02:00 |
wernsaar
|
3a5d8dbff9
|
optimized sgemv_n_4.c
|
2014-09-03 15:34:30 +02:00 |
wernsaar
|
2a60c6d4b0
|
optimized sgemv_n for small sizes
|
2014-09-03 14:48:45 +02:00 |
wernsaar
|
0fc560ba23
|
bugfix for buffer overflow
|
2014-09-03 10:13:47 +02:00 |
wernsaar
|
f3b50dcf5b
|
removed obsolete instructions from sgemv_t_4.c
|
2014-09-02 13:35:41 +02:00 |
wernsaar
|
93eaba959d
|
optimized sgemv_t for bulldozer
|
2014-09-02 12:42:36 +02:00 |
wernsaar
|
9570e56965
|
optimized sgemv_t_4.c for small sizes
|
2014-09-01 15:11:37 +02:00 |
wernsaar
|
bc99faef1b
|
optimized sgemv_t_4.c for uneven sizes
|
2014-08-31 14:33:15 +02:00 |
wernsaar
|
848c0f16f7
|
optimized sgemv_t_4.c for small size
|
2014-08-31 13:23:44 +02:00 |
wernsaar
|
53e6dbf6ca
|
optimized sgemv_t kernel for small sizes
|
2014-08-30 13:36:27 +02:00 |
wernsaar
|
20cd850125
|
modification for clang compiler
|
2014-08-27 09:00:20 +02:00 |
wernsaar
|
3885eebdb8
|
added optimized zaxpy bulldozer kernel
|
2014-08-25 15:52:35 +02:00 |
wernsaar
|
ee74445155
|
added optimized caxpy kernel for bulldozer
|
2014-08-25 14:53:28 +02:00 |
wernsaar
|
9d2ace8bac
|
added optimized daxpy kernel for bulldozer
|
2014-08-24 10:57:12 +02:00 |
wernsaar
|
b55f997302
|
added optimized daxpy kernel for nehalem
|
2014-08-23 17:53:07 +02:00 |
wernsaar
|
e45c960c2c
|
added optimized saxpy kernel for nehalem
|
2014-08-23 17:15:21 +02:00 |
wernsaar
|
ac76b6267f
|
added optimized dgemv_n kernel for nehalem
|
2014-08-23 10:40:57 +02:00 |
wernsaar
|
f1b96c4846
|
added optimized ddot kernel for bulldozer
|
2014-08-22 21:19:29 +02:00 |
wernsaar
|
16d6be852d
|
added optimized ddot kernel for nehalem
|
2014-08-22 20:34:41 +02:00 |
wernsaar
|
95a707ced3
|
update of KERNEL.BULLDOZER
|
2014-08-22 17:01:27 +02:00 |
wernsaar
|
5d97b0754c
|
added optimized sdot kernel for nehalem
|
2014-08-22 17:00:26 +02:00 |
wernsaar
|
8a9e868919
|
added optimized sdot for bulldozer
|
2014-08-22 14:29:17 +02:00 |
wernsaar
|
c8b0645266
|
added optimized symv_L kernels for nehalem
|
2014-08-21 14:27:00 +02:00 |
wernsaar
|
ec05ff3f64
|
added optimized ssymv_L kernel for bulldozer
|
2014-08-21 13:32:06 +02:00 |
wernsaar
|
f6f9122660
|
added optimized dsymv_L kernel for bulldozer
|
2014-08-21 13:02:53 +02:00 |
wernsaar
|
8247f38dc1
|
added optimized dsymv_U kernel for nehalem
|
2014-08-20 09:58:04 +02:00 |
wernsaar
|
ef6374196d
|
updated optimized dsymv_U kernel for bulldozer
|
2014-08-20 09:00:56 +02:00 |
wernsaar
|
f824c2b751
|
updated optimized ssymv_U for bulldozer
|
2014-08-19 19:25:03 +02:00 |
wernsaar
|
4ba4ab623f
|
added optimized ssymv_U kernel for nehalem
|
2014-08-19 17:09:45 +02:00 |
wernsaar
|
4f39447c05
|
added optimized ssymv_U kernel for bulldozer
|
2014-08-18 13:52:24 +02:00 |
wernsaar
|
74c9465672
|
added optimized dsymv_U kernel for bulldozer
|
2014-08-18 12:18:10 +02:00 |
wernsaar
|
101dd08173
|
add reference in C for symv_U
|
2014-08-16 13:52:50 +02:00 |
wernsaar
|
493d4fe7e5
|
added reference in C for symv_L
|
2014-08-16 11:36:48 +02:00 |
wernsaar
|
11eab4c019
|
added optimized cgemv_n for haswell
|
2014-08-14 19:00:30 +02:00 |
wernsaar
|
4568d32b6b
|
added optimized cgemv_t kernel for haswell
|
2014-08-14 14:10:29 +02:00 |
wernsaar
|
c1a6374c6f
|
optimized zgemv_n kernel for sandybridge
|
2014-08-13 16:10:03 +02:00 |
wernsaar
|
2470129132
|
added fast return, if m or n < 1
|
2014-08-13 13:54:19 +02:00 |
wernsaar
|
8c582d362d
|
optimized zgemv_t_microk_haswell-2.c
|
2014-08-13 13:42:22 +02:00 |
wernsaar
|
11e34ddd1b
|
bugfix for zgemv_n_microk_haswell-2.c
|
2014-08-13 12:54:18 +02:00 |
wernsaar
|
9528f0d9ee
|
bugfix in zgemv_n_microk_sandy-2.c
|
2014-08-13 12:18:03 +02:00 |
wernsaar
|
b06550519e
|
added optimized cgemv_t c-kernel
|
2014-08-12 12:15:41 +02:00 |
wernsaar
|
6093ee5363
|
bugfix in zgemv_n_microk_haswell-2.c
|
2014-08-12 10:02:25 +02:00 |
wernsaar
|
07c66b1960
|
modified algorithm for better numerical stability
|
2014-08-12 08:35:42 +02:00 |
wernsaar
|
58b075daef
|
added optimized zgemv_t kernel for haswell
|
2014-08-11 16:57:52 +02:00 |
wernsaar
|
09fcd3a341
|
add optimized zgemv_t kernel for bulldozer
|
2014-08-11 14:19:25 +02:00 |
wernsaar
|
726ad085cb
|
added optimized zgemv_t for haswell
|
2014-08-11 13:10:12 +02:00 |
wernsaar
|
6fe416976d
|
added optimimized zgemv_t c-kernel
|
2014-08-11 09:13:18 +02:00 |
wernsaar
|
dbc2eff029
|
disabled optimized haswell zgemv_n kernel for windows ( bad rounding )
|
2014-08-10 11:57:24 +02:00 |
wernsaar
|
462b4885ff
|
added optimized zgemv_n kernel for haswell
|
2014-08-10 08:39:17 +02:00 |
wernsaar
|
aa54fe064c
|
added zgemv_n c-function
|
2014-08-07 22:30:20 +02:00 |
wernsaar
|
006ef3ea01
|
added optimized dgemv_t kernel for haswell
|
2014-08-07 10:08:54 +02:00 |
wernsaar
|
60f17628cc
|
added optimized dgemv_n kernel for haswell
|
2014-08-07 09:18:02 +02:00 |
wernsaar
|
c9bad1403a
|
added optimized sgemv_t kernel for sandybridge
|
2014-08-07 07:49:33 +02:00 |
wernsaar
|
2f8927376f
|
enabled optimized nehalem sgemv_t kernel for windows
|
2014-08-06 16:58:21 +02:00 |
wernsaar
|
d945a2b06d
|
added optimized sgemv_t kernel for nehalem
|
2014-08-06 16:21:48 +02:00 |
wernsaar
|
ca6c8d06ce
|
enabled optimized sgemv kernels for windows
|
2014-08-06 14:24:36 +02:00 |
wernsaar
|
7aa43c8928
|
enabled optimized sgemv kernels for windows
|
2014-08-06 14:06:30 +02:00 |
wernsaar
|
891b960854
|
added optimized sgemv_t kernel for haswell
|
2014-08-06 13:42:41 +02:00 |
wernsaar
|
95a8caa2f3
|
added optimized sgemv_t kernel
|
2014-08-06 12:12:17 +02:00 |
wernsaar
|
8c05b8105b
|
bugfix in sgemv_n.c
|
2014-08-05 20:14:29 +02:00 |
wernsaar
|
c80084a98f
|
changed default x86_64 sgemv_n kernel to sgemv_n.c
|
2014-08-05 19:42:56 +02:00 |
wernsaar
|
2bab92961f
|
enabled optimized sgemv_n kernels for windows
|
2014-08-05 14:52:54 +02:00 |
wernsaar
|
9175b8bd5f
|
changed long to blaslong for windows compatibility
|
2014-08-05 13:28:39 +02:00 |
wernsaar
|
793f2d43b0
|
added optimized sgemv_n kernel for nehalem
|
2014-08-05 10:50:08 +02:00 |
wernsaar
|
a4dde45f87
|
optimized sgemv_n kernel for sandybridge
|
2014-08-05 08:53:09 +02:00 |
wernsaar
|
7fa7ea3e1e
|
updated haswell optimized sgmv_n kernel
|
2014-08-05 08:04:47 +02:00 |
wernsaar
|
3fbc13eb65
|
modified sgemv_n for haswell
|
2014-08-04 16:22:11 +02:00 |
wernsaar
|
db6917303f
|
added a better optimized sgemv_n kernel for bulldozer and piledriver
|
2014-08-04 14:29:01 +02:00 |
wernsaar
|
5087096711
|
optimization of sandybridge cgemm-kernel
|
2014-07-29 19:07:21 +02:00 |
wernsaar
|
46bc4fd50c
|
optimized cgemm kernel for haswell
|
2014-07-29 08:53:09 +02:00 |
wernsaar
|
1cc02b4337
|
optimized sgemm kernel for haswell
|
2014-07-28 11:50:01 +02:00 |
wernsaar
|
1d33547222
|
optimized zgemm kernel for haswell
|
2014-07-27 11:51:42 +02:00 |
wernsaar
|
125610d23b
|
allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk
|
2014-07-24 18:43:31 +02:00 |
wernsaar
|
6acbafe45b
|
added sgemv_n microkernel for haswell
|
2014-07-20 14:52:25 +02:00 |
wernsaar
|
5392d11b04
|
optimized sgemv_n_microk_sandy.c
|
2014-07-20 14:08:04 +02:00 |
wernsaar
|
c0fe95fb72
|
added sgemv_n microkernel for sandybridge
|
2014-07-20 13:17:47 +02:00 |
wernsaar
|
d9d4077c93
|
added sgemv_t microkernel for haswell
|
2014-07-20 11:30:32 +02:00 |
wernsaar
|
02eb72ac42
|
bugfix in sgemv_t_microk_sandy.c
|
2014-07-20 10:48:41 +02:00 |
wernsaar
|
c06f9986d4
|
added sgemv_t microkernel for sandybridge
|
2014-07-20 10:21:08 +02:00 |
wernsaar
|
2cce125c79
|
added optimized sgemv_t for bulldozer and piledriver
|
2014-07-19 15:48:07 +02:00 |
wernsaar
|
b3938fe371
|
don't use this sgemv_n on Windows
|
2014-07-19 07:15:34 +02:00 |
wernsaar
|
c8a4a56177
|
performance optimizations for sgemv_n
|
2014-07-18 11:25:21 +02:00 |
wernsaar
|
3c5732615d
|
added blocked sgemv_n and microkernel for bulldozer and piledriver
|
2014-07-17 23:15:07 +02:00 |
wernsaar
|
880597b301
|
segment violation in sgemv kernels
|
2014-07-13 10:46:14 +02:00 |
wernsaar
|
0884b73c69
|
Lapack-test Windows 32bit now error free
|
2014-07-10 11:01:47 +02:00 |
wernsaar
|
9bd9472ae9
|
Lapack-test: cleanup of x86 32bit KERNEL file
|
2014-07-09 16:08:19 +02:00 |
wernsaar
|
c4a423a642
|
bugfixes for lapack on ARM Platform
|
2014-07-09 12:21:39 +02:00 |
wernsaar
|
13348b2137
|
removed reference to daxpy_bulldozer kernel (Windows bug in lapack-test)
|
2014-07-06 16:39:32 +02:00 |
wernsaar
|
9964ed2f79
|
bugfix for CORE2
|
2014-07-06 11:47:28 +02:00 |
wernsaar
|
d5b976f92d
|
fallback to zgemm_kernel_4x2_sse.S
|
2014-07-06 11:05:28 +02:00 |
wernsaar
|
f7267d9b0e
|
added missing definition for DUNNINGTON
|
2014-07-06 10:17:07 +02:00 |
wernsaar
|
e0c080a28c
|
removed reference to zgemm_kernel_4x2_sse3.S (bug in lapack-test)
|
2014-07-05 16:13:17 +02:00 |