Merge pull request #897 from ksraste/develop

STRSM optimized for MSA
This commit is contained in:
Zhang Xianyi 2016-06-27 10:04:18 -04:00 committed by GitHub
commit 4a30a2584a
6 changed files with 1447 additions and 3049 deletions

View File

@ -12,10 +12,6 @@ ifeq ($(ARCH), ia64)
USE_GEMM3M = 1 USE_GEMM3M = 1
endif endif
ifeq ($(ARCH), MIPS)
USE_GEMM3M = 1
endif
ifeq ($(ARCH), arm) ifeq ($(ARCH), arm)
USE_TRMM = 1 USE_TRMM = 1
endif endif

View File

@ -42,6 +42,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ST_D(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) #define ST_D(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
#define ST_DP(...) ST_D(v2f64, __VA_ARGS__) #define ST_DP(...) ST_D(v2f64, __VA_ARGS__)
#define COPY_FLOAT_TO_VECTOR(a, b) \
b = __msa_cast_to_vector_float(a); \
b = (v4f32) __msa_splati_w((v4i32) b, 0);
/* Description : Load 2 vectors of single precision floating point elements with stride /* Description : Load 2 vectors of single precision floating point elements with stride
Arguments : Inputs - psrc, stride Arguments : Inputs - psrc, stride
Outputs - out0, out1 Outputs - out0, out1
@ -178,7 +183,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
out2 = (RTYPE) __msa_ilvr_d((v2i64) s3_m, (v2i64) s1_m); \ out2 = (RTYPE) __msa_ilvr_d((v2i64) s3_m, (v2i64) s1_m); \
out3 = (RTYPE) __msa_ilvl_d((v2i64) s3_m, (v2i64) s1_m); \ out3 = (RTYPE) __msa_ilvl_d((v2i64) s3_m, (v2i64) s1_m); \
} }
#define TRANSPOSE4x4_SP_SP(...) TRANSPOSE4x4_W(v4f32, __VA_ARGS__) #define TRANSPOSE4x4_SP_SP(...) TRANSPOSE4x4_W(v4f32, __VA_ARGS__)
#endif /* __MACROS_MSA_H__ */ #endif /* __MACROS_MSA_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff