diff --git a/kernel/power/sgemm_kernel_16x8_power8.S b/kernel/power/sgemm_kernel_16x8_power8.S index 9f221301a..031f342ad 100644 --- a/kernel/power/sgemm_kernel_16x8_power8.S +++ b/kernel/power/sgemm_kernel_16x8_power8.S @@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /************************************************************************************** -* 2016/03/14 Werner Saar (wernsaar@googlemail.com) +* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * BLASTEST : OK * CTEST : OK * TEST : OK +* LAPACK-TEST : OK **************************************************************************************/ /*********************************************************************/ @@ -81,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 320 +#define STACKSIZE 340 #define ALPHA_SP 296(SP) #define FZERO 304(SP) #else @@ -127,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #define alpha_r vs30 -#define alpha_vr vs31 #define o0 0 +#define TBUFFER r14 #define o4 r15 #define o12 r16 #define o8 r17 @@ -202,6 +203,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r17, 256(SP) std r16, 264(SP) std r15, 272(SP) + std r14, 280(SP) #else stw r31, 144(SP) stw r30, 148(SP) @@ -220,6 +222,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r17, 200(SP) stw r16, 204(SP) stw r15, 208(SP) + stw r14, 212(SP) #endif // stfd f1, ALPHA_SP @@ -259,24 +262,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmpwi cr0, K, 0 ble .L999_H1 - li PRE, 384 + li PRE, 256 li o4 , 4 li o8 , 8 li o12, 12 li o16, 16 li o32, 32 li o48, 48 + addi TBUFFER, SP, 320 addi T1, SP, 300 stfs f1, 0(T1) - stfs f1, 4(T1) - stfs f1, 8(T1) - stfs f1,12(T1) - lxsspx vs28, 0, T1 - - xxspltw alpha_r, vs28 , 0 - lxvw4x alpha_vr, 0, T1 + lxsspx alpha_r, 0, T1 @@ -326,6 +324,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r17, 256(SP) ld r16, 264(SP) ld r15, 272(SP) + ld r14, 280(SP) #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -344,6 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r17, 200(SP) lwz r16, 204(SP) lwz r15, 208(SP) + lwz r14, 212(SP) #endif addi SP, SP, STACKSIZE diff --git a/kernel/power/sgemm_logic_16x8_power8.S b/kernel/power/sgemm_logic_16x8_power8.S index 6c5a1c7ef..0ae6413ce 100644 --- a/kernel/power/sgemm_logic_16x8_power8.S +++ b/kernel/power/sgemm_logic_16x8_power8.S @@ -26,13 +26,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /************************************************************************************** -* 2016/03/14 Werner Saar (wernsaar@googlemail.com) +* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * BLASTEST : OK * CTEST : OK * TEST : OK +* LAPACK-TEST : OK **************************************************************************************/ - srawi. J, N, 3 ble .LSGEMM_L8_END diff --git a/kernel/power/sgemm_macros_16x8_power8.S b/kernel/power/sgemm_macros_16x8_power8.S index 78f530cfa..a2d36c089 100644 --- a/kernel/power/sgemm_macros_16x8_power8.S +++ b/kernel/power/sgemm_macros_16x8_power8.S @@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /************************************************************************************** -* 2016/03/14 Werner Saar (wernsaar@googlemail.com) +* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * BLASTEST : OK * CTEST : OK * TEST : OK +* LAPACK-TEST : OK **************************************************************************************/ /********************************************************************************************** @@ -38,49 +39,65 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro LOAD8x16_1 - lxvw4x vs28, o0, BO - lxvw4x vs29, o16, BO - lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO lxvw4x vs2, o32, AO lxvw4x vs3, o48, AO + addi AO, AO, 64 + + lxvw4x vs28, o0, BO + xxspltw vs8, vs28, 0 xxspltw vs9, vs28, 1 xxspltw vs10, vs28, 2 xxspltw vs11, vs28, 3 + lxvw4x vs29, o16, BO + xxspltw vs12, vs29, 0 xxspltw vs13, vs29, 1 - addi AO, AO, 64 - addi BO, BO, 32 xxspltw vs14, vs29, 2 xxspltw vs15, vs29, 3 + addi BO, BO, 32 .endm .macro KERNEL8x16_I1 - xvmulsp vs32, vs0, vs8 - xvmulsp vs33, vs1, vs8 lxvw4x vs4, o0, AO lxvw4x vs5, o16, AO - - xvmulsp vs34, vs2, vs8 - xvmulsp vs35, vs3, vs8 - - lxvw4x vs28, o0, BO - lxvw4x vs29, o16, BO - - xvmulsp vs36, vs0, vs9 - xvmulsp vs37, vs1, vs9 - lxvw4x vs6, o32, AO lxvw4x vs7, o48, AO + addi AO, AO, 64 + + lxvw4x vs28, o0, BO + + xxspltw vs16, vs28, 0 + xxspltw vs17, vs28, 1 + xxspltw vs18, vs28, 2 + xxspltw vs19, vs28, 3 + + lxvw4x vs29, o16, BO + + xxspltw vs20, vs29, 0 + xxspltw vs21, vs29, 1 + xxspltw vs22, vs29, 2 + xxspltw vs23, vs29, 3 + + addi BO, BO, 32 + + + xvmulsp vs32, vs0, vs8 + xvmulsp vs33, vs1, vs8 + xvmulsp vs34, vs2, vs8 + xvmulsp vs35, vs3, vs8 + + xvmulsp vs36, vs0, vs9 + xvmulsp vs37, vs1, vs9 xvmulsp vs38, vs2, vs9 xvmulsp vs39, vs3, vs9 @@ -104,27 +121,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs54, vs2, vs13 xvmulsp vs55, vs3, vs13 - xxspltw vs16, vs28, 0 - xxspltw vs17, vs28, 1 - xxspltw vs18, vs28, 2 - xxspltw vs19, vs28, 3 - xvmulsp vs56, vs0, vs14 xvmulsp vs57, vs1, vs14 xvmulsp vs58, vs2, vs14 xvmulsp vs59, vs3, vs14 - xxspltw vs20, vs29, 0 - xxspltw vs21, vs29, 1 - xxspltw vs22, vs29, 2 - xxspltw vs23, vs29, 3 - xvmulsp vs60, vs0, vs15 xvmulsp vs61, vs1, vs15 - - addi AO, AO, 64 - addi BO, BO, 32 - xvmulsp vs62, vs2, vs15 xvmulsp vs63, vs3, vs15 @@ -135,36 +138,40 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs0, vs8 xvmaddasp vs33, vs1, vs8 - + lxvw4x vs28, o0, BO lxvw4x vs4, o0, AO - lxvw4x vs5, o16, AO - xvmaddasp vs34, vs2, vs8 xvmaddasp vs35, vs3, vs8 xvmaddasp vs36, vs0, vs9 xvmaddasp vs37, vs1, vs9 - lxvw4x vs28, o0, BO + lxvw4x vs29, o16, BO + lxvw4x vs5, o16, AO xvmaddasp vs38, vs2, vs9 xvmaddasp vs39, vs3, vs9 xvmaddasp vs40, vs0, vs10 xvmaddasp vs41, vs1, vs10 - lxvw4x vs6, o32, AO lxvw4x vs7, o48, AO - xvmaddasp vs42, vs2, vs10 xvmaddasp vs43, vs3, vs10 + xxspltw vs16, vs28, 0 + xxspltw vs17, vs28, 1 + xxspltw vs18, vs28, 2 + xxspltw vs19, vs28, 3 + xvmaddasp vs44, vs0, vs11 xvmaddasp vs45, vs1, vs11 - - lxvw4x vs29, o16, BO - xvmaddasp vs46, vs2, vs11 xvmaddasp vs47, vs3, vs11 + xxspltw vs20, vs29, 0 + xxspltw vs21, vs29, 1 + xxspltw vs22, vs29, 2 + xxspltw vs23, vs29, 3 + xvmaddasp vs48, vs0, vs12 xvmaddasp vs49, vs1, vs12 xvmaddasp vs50, vs2, vs12 @@ -172,36 +179,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs52, vs0, vs13 xvmaddasp vs53, vs1, vs13 - - xxspltw vs16, vs28, 0 - xxspltw vs17, vs28, 1 - xvmaddasp vs54, vs2, vs13 xvmaddasp vs55, vs3, vs13 xvmaddasp vs56, vs0, vs14 xvmaddasp vs57, vs1, vs14 - - xxspltw vs18, vs28, 2 - xxspltw vs19, vs28, 3 - + addi AO, AO, 64 + addi BO, BO, 32 xvmaddasp vs58, vs2, vs14 xvmaddasp vs59, vs3, vs14 - xxspltw vs20, vs29, 0 - xxspltw vs21, vs29, 1 - xvmaddasp vs60, vs0, vs15 xvmaddasp vs61, vs1, vs15 - - addi AO, AO, 64 - addi BO, BO, 32 - xvmaddasp vs62, vs2, vs15 xvmaddasp vs63, vs3, vs15 - xxspltw vs22, vs29, 2 - xxspltw vs23, vs29, 3 .endm @@ -210,8 +202,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs4, vs16 xvmaddasp vs33, vs5, vs16 + lxvw4x vs28, o0, BO lxvw4x vs0, o0, AO - lxvw4x vs1, o16, AO xvmaddasp vs34, vs6, vs16 xvmaddasp vs35, vs7, vs16 @@ -219,28 +211,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs36, vs4, vs17 xvmaddasp vs37, vs5, vs17 - lxvw4x vs28, o0, BO + lxvw4x vs29, o16, BO + lxvw4x vs1, o16, AO xvmaddasp vs38, vs6, vs17 xvmaddasp vs39, vs7, vs17 - xvmaddasp vs40, vs4, vs18 - xvmaddasp vs41, vs5, vs18 - lxvw4x vs2, o32, AO lxvw4x vs3, o48, AO + xvmaddasp vs40, vs4, vs18 + xvmaddasp vs41, vs5, vs18 xvmaddasp vs42, vs6, vs18 xvmaddasp vs43, vs7, vs18 + xxspltw vs8, vs28, 0 + xxspltw vs9, vs28, 1 + xxspltw vs10, vs28, 2 + xxspltw vs11, vs28, 3 + xvmaddasp vs44, vs4, vs19 xvmaddasp vs45, vs5, vs19 - - lxvw4x vs29, o16, BO - xvmaddasp vs46, vs6, vs19 xvmaddasp vs47, vs7, vs19 + xxspltw vs12, vs29, 0 + xxspltw vs13, vs29, 1 + xxspltw vs14, vs29, 2 + xxspltw vs15, vs29, 3 + xvmaddasp vs48, vs4, vs20 xvmaddasp vs49, vs5, vs20 xvmaddasp vs50, vs6, vs20 @@ -248,32 +247,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs52, vs4, vs21 xvmaddasp vs53, vs5, vs21 - - xxspltw vs8, vs28, 0 - xxspltw vs9, vs28, 1 - xxspltw vs10, vs28, 2 - xxspltw vs11, vs28, 3 - xvmaddasp vs54, vs6, vs21 xvmaddasp vs55, vs7, vs21 xvmaddasp vs56, vs4, vs22 xvmaddasp vs57, vs5, vs22 - - xxspltw vs12, vs29, 0 - xxspltw vs13, vs29, 1 - xxspltw vs14, vs29, 2 - xxspltw vs15, vs29, 3 - xvmaddasp vs58, vs6, vs22 xvmaddasp vs59, vs7, vs22 xvmaddasp vs60, vs4, vs23 xvmaddasp vs61, vs5, vs23 - addi AO, AO, 64 addi BO, BO, 32 - xvmaddasp vs62, vs6, vs23 xvmaddasp vs63, vs7, vs23 @@ -479,22 +464,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - xvmulsp vs2, vs34, alpha_vr - xvmulsp vs3, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - xvmaddasp vs2, vs34, alpha_vr - xvmaddasp vs3, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -512,22 +581,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs36, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs36, alpha_vr - xvmulsp vs1, vs37, alpha_vr - xvmulsp vs2, vs38, alpha_vr - xvmulsp vs3, vs39, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs36, alpha_vr - xvmaddasp vs1, vs37, alpha_vr - xvmaddasp vs2, vs38, alpha_vr - xvmaddasp vs3, vs39, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs37, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs38, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs39, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -545,22 +698,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs40, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs40, alpha_vr - xvmulsp vs1, vs41, alpha_vr - xvmulsp vs2, vs42, alpha_vr - xvmulsp vs3, vs43, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs40, alpha_vr - xvmaddasp vs1, vs41, alpha_vr - xvmaddasp vs2, vs42, alpha_vr - xvmaddasp vs3, vs43, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs41, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs42, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs43, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -578,22 +815,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs44, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs44, alpha_vr - xvmulsp vs1, vs45, alpha_vr - xvmulsp vs2, vs46, alpha_vr - xvmulsp vs3, vs47, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs44, alpha_vr - xvmaddasp vs1, vs45, alpha_vr - xvmaddasp vs2, vs46, alpha_vr - xvmaddasp vs3, vs47, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs45, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs46, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs47, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -611,22 +932,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs48, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs48, alpha_vr - xvmulsp vs1, vs49, alpha_vr - xvmulsp vs2, vs50, alpha_vr - xvmulsp vs3, vs51, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs48, alpha_vr - xvmaddasp vs1, vs49, alpha_vr - xvmaddasp vs2, vs50, alpha_vr - xvmaddasp vs3, vs51, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs49, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs50, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs51, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -644,22 +1049,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs52, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs52, alpha_vr - xvmulsp vs1, vs53, alpha_vr - xvmulsp vs2, vs54, alpha_vr - xvmulsp vs3, vs55, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs52, alpha_vr - xvmaddasp vs1, vs53, alpha_vr - xvmaddasp vs2, vs54, alpha_vr - xvmaddasp vs3, vs55, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs53, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs54, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs55, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -677,22 +1166,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs56, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs56, alpha_vr - xvmulsp vs1, vs57, alpha_vr - xvmulsp vs2, vs58, alpha_vr - xvmulsp vs3, vs59, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs56, alpha_vr - xvmaddasp vs1, vs57, alpha_vr - xvmaddasp vs2, vs58, alpha_vr - xvmaddasp vs3, vs59, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs57, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs58, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs59, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -710,22 +1283,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs60, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs60, alpha_vr - xvmulsp vs1, vs61, alpha_vr - xvmulsp vs2, vs62, alpha_vr - xvmulsp vs3, vs63, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs60, alpha_vr - xvmaddasp vs1, vs61, alpha_vr - xvmaddasp vs2, vs62, alpha_vr - xvmaddasp vs3, vs63, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs61, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs62, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs63, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -1068,18 +1725,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1093,18 +1790,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs34, alpha_vr - xvmulsp vs1, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs34, alpha_vr - xvmaddasp vs1, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1118,18 +1855,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs36, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs36, alpha_vr - xvmulsp vs1, vs37, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs36, alpha_vr - xvmaddasp vs1, vs37, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs37, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1143,18 +1920,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs38, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs38, alpha_vr - xvmulsp vs1, vs39, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs38, alpha_vr - xvmaddasp vs1, vs39, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs39, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1168,18 +1985,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs40, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs40, alpha_vr - xvmulsp vs1, vs41, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs40, alpha_vr - xvmaddasp vs1, vs41, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs41, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1193,18 +2050,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs42, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs42, alpha_vr - xvmulsp vs1, vs43, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs42, alpha_vr - xvmaddasp vs1, vs43, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs43, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1218,18 +2115,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs44, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs44, alpha_vr - xvmulsp vs1, vs45, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs44, alpha_vr - xvmaddasp vs1, vs45, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs45, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1243,18 +2180,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs46, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs46, alpha_vr - xvmulsp vs1, vs47, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs46, alpha_vr - xvmaddasp vs1, vs47, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs47, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -1540,16 +2517,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1561,16 +2556,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1582,16 +2595,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs34, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs34, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1603,16 +2634,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1624,16 +2673,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs36, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs36, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs36, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1645,16 +2712,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs37, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs37, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs37, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1666,16 +2751,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs38, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs38, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs38, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -1687,16 +2790,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs39, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs39, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs39, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -2043,8 +3164,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r - xsmaddasp vs1, vs33, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs33, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2068,8 +3191,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs34, alpha_r - xsmaddasp vs1, vs35, alpha_r + xsmulsp vs28, vs34, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs35, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2093,8 +3218,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs36, alpha_r - xsmaddasp vs1, vs37, alpha_r + xsmulsp vs28, vs36, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs37, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2118,8 +3245,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs38, alpha_r - xsmaddasp vs1, vs39, alpha_r + xsmulsp vs28, vs38, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs39, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2143,8 +3272,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs40, alpha_r - xsmaddasp vs1, vs41, alpha_r + xsmulsp vs28, vs40, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs41, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2168,8 +3299,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs42, alpha_r - xsmaddasp vs1, vs43, alpha_r + xsmulsp vs28, vs42, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs43, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2193,8 +3326,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs44, alpha_r - xsmaddasp vs1, vs45, alpha_r + xsmulsp vs28, vs44, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs45, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2218,8 +3353,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs46, alpha_r - xsmaddasp vs1, vs47, alpha_r + xsmulsp vs28, vs46, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs47, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -2514,7 +3651,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2535,7 +3673,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs33, alpha_r + xsmulsp vs28, vs33, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2556,7 +3695,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs34, alpha_r + xsmulsp vs28, vs34, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2577,7 +3717,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs35, alpha_r + xsmulsp vs28, vs35, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2598,7 +3739,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs36, alpha_r + xsmulsp vs28, vs36, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2619,7 +3761,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs37, alpha_r + xsmulsp vs28, vs37, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2640,7 +3783,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs38, alpha_r + xsmulsp vs28, vs38, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2661,7 +3805,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs39, alpha_r + xsmulsp vs28, vs39, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -2952,22 +4097,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - xvmulsp vs2, vs34, alpha_vr - xvmulsp vs3, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - xvmaddasp vs2, vs34, alpha_vr - xvmaddasp vs3, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -2985,22 +4214,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs36, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs36, alpha_vr - xvmulsp vs1, vs37, alpha_vr - xvmulsp vs2, vs38, alpha_vr - xvmulsp vs3, vs39, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs36, alpha_vr - xvmaddasp vs1, vs37, alpha_vr - xvmaddasp vs2, vs38, alpha_vr - xvmaddasp vs3, vs39, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs37, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs38, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs39, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -3018,22 +4331,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs40, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs40, alpha_vr - xvmulsp vs1, vs41, alpha_vr - xvmulsp vs2, vs42, alpha_vr - xvmulsp vs3, vs43, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs40, alpha_vr - xvmaddasp vs1, vs41, alpha_vr - xvmaddasp vs2, vs42, alpha_vr - xvmaddasp vs3, vs43, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs41, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs42, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs43, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -3051,22 +4448,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs44, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs44, alpha_vr - xvmulsp vs1, vs45, alpha_vr - xvmulsp vs2, vs46, alpha_vr - xvmulsp vs3, vs47, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs44, alpha_vr - xvmaddasp vs1, vs45, alpha_vr - xvmaddasp vs2, vs46, alpha_vr - xvmaddasp vs3, vs47, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs45, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs46, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs47, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -3295,18 +4776,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -3320,18 +4841,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs34, alpha_vr - xvmulsp vs1, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs34, alpha_vr - xvmaddasp vs1, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -3345,18 +4906,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs36, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs36, alpha_vr - xvmulsp vs1, vs37, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs36, alpha_vr - xvmaddasp vs1, vs37, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs37, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -3370,18 +4971,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs38, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs38, alpha_vr - xvmulsp vs1, vs39, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs38, alpha_vr - xvmaddasp vs1, vs39, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs39, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -3577,16 +5218,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -3598,16 +5257,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -3619,16 +5296,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs34, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs34, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -3640,16 +5335,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -3882,8 +5595,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r - xsmaddasp vs1, vs33, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs33, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -3907,8 +5622,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs34, alpha_r - xsmaddasp vs1, vs35, alpha_r + xsmulsp vs28, vs34, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs35, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -3932,8 +5649,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs36, alpha_r - xsmaddasp vs1, vs37, alpha_r + xsmulsp vs28, vs36, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs37, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -3957,8 +5676,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs38, alpha_r - xsmaddasp vs1, vs39, alpha_r + xsmulsp vs28, vs38, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs39, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -4163,7 +5884,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -4184,7 +5906,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs33, alpha_r + xsmulsp vs28, vs33, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -4205,7 +5928,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs34, alpha_r + xsmulsp vs28, vs34, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -4226,7 +5950,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs35, alpha_r + xsmulsp vs28, vs35, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -4445,22 +6170,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - xvmulsp vs2, vs34, alpha_vr - xvmulsp vs3, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - xvmaddasp vs2, vs34, alpha_vr - xvmaddasp vs3, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -4478,22 +6287,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs36, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs36, alpha_vr - xvmulsp vs1, vs37, alpha_vr - xvmulsp vs2, vs38, alpha_vr - xvmulsp vs3, vs39, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs36, alpha_vr - xvmaddasp vs1, vs37, alpha_vr - xvmaddasp vs2, vs38, alpha_vr - xvmaddasp vs3, vs39, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs37, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs38, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs39, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -4674,18 +6567,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -4699,18 +6632,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs34, alpha_vr - xvmulsp vs1, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs34, alpha_vr - xvmaddasp vs1, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -4870,16 +6843,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -4891,16 +6882,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -5085,8 +7094,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r - xsmaddasp vs1, vs33, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs33, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -5110,8 +7121,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs34, alpha_r - xsmaddasp vs1, vs35, alpha_r + xsmulsp vs28, vs34, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs35, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -5280,7 +7293,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -5301,7 +7315,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs33, alpha_r + xsmulsp vs28, vs33, alpha_r + xsaddsp vs0, vs0, vs28 #endif @@ -5484,22 +7499,106 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - xvmulsp vs2, vs34, alpha_vr - xvmulsp vs3, vs35, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - xvmaddasp vs2, vs34, alpha_vr - xvmaddasp vs3, vs35, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + stxvw4x vs34, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs2, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs2, vs2, vs28 +#endif + + stxvw4x vs35, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs3, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs3, vs3, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 stxvw4x vs2, o32, T1 @@ -5656,18 +7755,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - xvmulsp vs1, vs33, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - xvmaddasp vs1, vs33, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + stxvw4x vs33, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + +#ifdef TRMMKERNEL + lxvw4x vs1, o0, TBUFFER +#else + lxvw4x vs28, o0, TBUFFER + xvaddsp vs1, vs1, vs28 +#endif + + + + stxvw4x vs0, o0, T1 stxvw4x vs1, o16, T1 @@ -5809,16 +7948,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + + stxvw4x vs32, o0, TBUFFER + + lxsspx vs4, o0, TBUFFER + lxsspx vs5, o4, TBUFFER + lxsspx vs6, o8, TBUFFER + lxsspx vs7, o12, TBUFFER + + xsmulsp vs4, vs4, alpha_r + xsmulsp vs5, vs5, alpha_r + xsmulsp vs6, vs6, alpha_r + xsmulsp vs7, vs7, alpha_r + + stxsspx vs4, o0, TBUFFER + stxsspx vs5, o4, TBUFFER + stxsspx vs6, o8, TBUFFER + stxsspx vs7, o12, TBUFFER + #ifdef TRMMKERNEL - - xvmulsp vs0, vs32, alpha_vr - + lxvw4x vs0, o0, TBUFFER #else - - xvmaddasp vs0, vs32, alpha_vr - + lxvw4x vs28, o0, TBUFFER + xvaddsp vs0, vs0, vs28 #endif + + + stxvw4x vs0, o0, T1 add T1, T1, LDC @@ -5979,8 +8136,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r - xsmaddasp vs1, vs33, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 + xsmulsp vs28, vs33, alpha_r + xsaddsp vs1, vs1, vs28 #endif @@ -6131,7 +8290,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else - xsmaddasp vs0, vs32, alpha_r + xsmulsp vs28, vs32, alpha_r + xsaddsp vs0, vs0, vs28 #endif diff --git a/kernel/power/strmm_kernel_16x8_power8.S b/kernel/power/strmm_kernel_16x8_power8.S index 5b1c5ca6b..5e607c58f 100644 --- a/kernel/power/strmm_kernel_16x8_power8.S +++ b/kernel/power/strmm_kernel_16x8_power8.S @@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /************************************************************************************** -* 2016/03/14 Werner Saar (wernsaar@googlemail.com) +* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * BLASTEST : OK * CTEST : OK * TEST : OK +* LAPACK-TEST : OK **************************************************************************************/ /*********************************************************************/ @@ -81,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 320 +#define STACKSIZE 340 #define ALPHA_SP 296(SP) #define FZERO 304(SP) #else @@ -127,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #define alpha_r vs30 -#define alpha_vr vs31 #define o0 0 +#define TBUFFER r13 #define o12 r14 #define o4 r15 #define K1 r16 @@ -138,7 +139,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define L r18 #define T1 r19 #define KK r20 -#define KKK 21 +#define KKK r21 #define I r22 #define J r23 #define AO r24 @@ -204,6 +205,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + std r13, 288(SP) #else stw r31, 144(SP) stw r30, 148(SP) @@ -223,6 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + stw r13, 216(SP) #endif // stfd f1, ALPHA_SP @@ -274,17 +277,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li o16, 16 li o32, 32 li o48, 48 + addi TBUFFER, SP, 320 addi T1, SP, 300 stfs f1, 0(T1) - stfs f1, 4(T1) - stfs f1, 8(T1) - stfs f1,12(T1) - lxsspx vs28, 0, T1 + lxsspx alpha_r, 0, T1 - xxspltw alpha_r, vs28 , 0 - lxvw4x alpha_vr, 0, T1 @@ -335,6 +334,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + ld r13, 288(SP) #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -354,6 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + lwz r13, 216(SP) #endif addi SP, SP, STACKSIZE diff --git a/kernel/power/strmm_logic_16x8_power8.S b/kernel/power/strmm_logic_16x8_power8.S index 0d6d04858..8ec11f1ef 100644 --- a/kernel/power/strmm_logic_16x8_power8.S +++ b/kernel/power/strmm_logic_16x8_power8.S @@ -26,14 +26,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /************************************************************************************** -* 2016/03/14 Werner Saar (wernsaar@googlemail.com) +* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * BLASTEST : OK * CTEST : OK * TEST : OK +* LAPACK-TEST : OK **************************************************************************************/ - srawi. J, N, 3 ble .LSTRMM_L8_END diff --git a/param.h b/param.h index 980650e09..370d10b9a 100644 --- a/param.h +++ b/param.h @@ -1977,12 +1977,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_M 8 #define ZGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_P 960 +#define SGEMM_DEFAULT_P 480 #define DGEMM_DEFAULT_P 480 #define CGEMM_DEFAULT_P 480 #define ZGEMM_DEFAULT_P 240 -#define SGEMM_DEFAULT_Q 720 +#define SGEMM_DEFAULT_Q 1440 #define DGEMM_DEFAULT_Q 720 #define CGEMM_DEFAULT_Q 720 #define ZGEMM_DEFAULT_Q 360