Merge pull request #897 from ksraste/develop

STRSM optimized for MSA
This commit is contained in:
Zhang Xianyi 2016-06-27 10:04:18 -04:00 committed by GitHub
commit 4a30a2584a
6 changed files with 1447 additions and 3049 deletions

View File

@ -12,10 +12,6 @@ ifeq ($(ARCH), ia64)
USE_GEMM3M = 1
endif
ifeq ($(ARCH), MIPS)
USE_GEMM3M = 1
endif
ifeq ($(ARCH), arm)
USE_TRMM = 1
endif

View File

@ -42,6 +42,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ST_D(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
#define ST_DP(...) ST_D(v2f64, __VA_ARGS__)
#define COPY_FLOAT_TO_VECTOR(a, b) \
b = __msa_cast_to_vector_float(a); \
b = (v4f32) __msa_splati_w((v4i32) b, 0);
/* Description : Load 2 vectors of single precision floating point elements with stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
@ -178,7 +183,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
out2 = (RTYPE) __msa_ilvr_d((v2i64) s3_m, (v2i64) s1_m); \
out3 = (RTYPE) __msa_ilvl_d((v2i64) s3_m, (v2i64) s1_m); \
}
#define TRANSPOSE4x4_SP_SP(...) TRANSPOSE4x4_W(v4f32, __VA_ARGS__)
#endif /* __MACROS_MSA_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff