diff --git a/kernel/zarch/gemm8x4V.S b/kernel/zarch/gemm8x4V.S index 0b4bc73c5..27fd5f57b 100644 --- a/kernel/zarch/gemm8x4V.S +++ b/kernel/zarch/gemm8x4V.S @@ -73,16 +73,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ASSEMBLER #include "common.h" -/************** Notes ON IBM abi and IBM assembly********************************************** -* General registers r0 and r1 should be used internally whenever possible -* General registers r2 to r5 should be second choice -* General registers r12 to r15 should only be used for their standard function. -* r0 should not be used as address disp register +/* #BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc ##bm=r2,bn=r3, bk=r4, alpha=f0,ba=r5,bb=r6,stack[160] ,ldc=stack[168] **********************************************************************************************/ - +/*Note: r0 can not be used as address disp register */ #define BM %r2 #define BM_CUR %r0 @@ -109,7 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -stmg %r6,%r12,40(%r15) +stmg %r6,%r12,48(%r15) lg CIJ, 160(%r15) lg LOCAL_VAR1, 168(%r15) srlg BN_CUR,BN,2 @@ -606,7 +602,7 @@ la B,0(B,LOCAL_VAR2) /*refresh B=B+Bk*1*sizeof(double) */ ALIGN_2 .L_FUNC_END: /*end*/ -lmg %r6,%r12,40(%r15) +lmg %r6,%r12,48(%r15) br %r14 .end diff --git a/kernel/zarch/trmm8x4V.S b/kernel/zarch/trmm8x4V.S index 8e6a03c16..4da113ff3 100644 --- a/kernel/zarch/trmm8x4V.S +++ b/kernel/zarch/trmm8x4V.S @@ -73,17 +73,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ASSEMBLER #include "common.h" -/************** Notes ON IBM abi and IBM assembly********************************************** -* General registers r0 and r1 should be used internally whenever possible -* General registers r2 to r5 should be second choice -* General registers r12 to r15 should only be used for their standard function. -* r0 should not be used as address disp register + +/* #BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc ##bm=r2,bn=r3, bk=r4, alpha=f0,ba=r5,bb=r6,stack[160] ,ldc=stack[168] offset=stack[176] **********************************************************************************************/ - +/*Note: r0 can not be used as address disp register */ #define BM %r2 #define BM_CUR %r0 @@ -131,16 +128,16 @@ offset=stack[176] /***********************************DGEMM***********************************************************/ PROLOGUE -#if defined(TRMMKERNEL) -stmg %r6,%r13,40(%r15) +#if defined(TRMMKERNEL) + std OFFSET,40(%r15) + stmg %r6,%r13,48(%r15) #else -stmg %r6,%r12,40(%r15) + stmg %r6,%r12,48(%r15) #endif lg CIJ, 160(%r15) lg LOCAL_VAR1, 168(%r15) #if defined(TRMMKERNEL) lg OFF,176(%r15) -std OFFSET,32(%r15) ldgr OFFSET ,OFF #endif srlg BN_CUR,BN,2 @@ -861,10 +858,10 @@ ALIGN_2 .L_FUNC_END: /*end*/ #if defined(TRMMKERNEL) -ld %f8,32(%r15) -lmg %r6,%r13,40(%r15) + ld OFFSET,40(%r15) + lmg %r6,%r13,48(%r15) #else -lmg %r6,%r12,40(%r15) + lmg %r6,%r12,48(%r15) #endif br %r14 .end diff --git a/kernel/zarch/ztrmm4x4V.S b/kernel/zarch/ztrmm4x4V.S index a838ea7fb..52ee15f06 100644 --- a/kernel/zarch/ztrmm4x4V.S +++ b/kernel/zarch/ztrmm4x4V.S @@ -73,11 +73,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ASSEMBLER #include "common.h" -/************** Notes ON IBM abi and IBM assembly********************************************** -* General registers r0 and r1 should be used internally whenever possible -* General registers r2 to r5 should be second choice -* General registers r12 to r15 should only be used for their standard function. -* r0 should not be used as address disp register +/* + BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* ba,FLOAT* bb, FLOAT* C,BLASLONG ldc, BLASLONG offset) @@ -85,7 +82,7 @@ BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* ba,FLOAT* b offset=stack[176] **********************************************************************************************/ - +/*Note: r0 can not be used as address disp register */ #define BM %r2 #define BM_CUR %r0 @@ -118,21 +115,21 @@ offset=stack[176] /***********************************ZGEMM**4x4*******************************************************/ PROLOGUE -#if defined(TRMMKERNEL) +#if defined(TRMMKERNEL) + std OFFSET ,40(%r15) stmg %r6,%r13,48(%r15) #else stmg %r6,%r12,48(%r15) #endif -std %f11,8(%r15) -std %f10,16(%r15) -std %f9,24(%r15) -std %f12,32(%r15) +std %f9, 128(%r15) +std %f10,136(%r15) +std %f11,144(%r15) +std %f12,152(%r15) lg CIJ, 160(%r15) lg LOCAL_VAR1, 168(%r15) #if defined(TRMMKERNEL) lg OFF,176(%r15) - std OFFSET,40(%r15) ldgr OFFSET ,OFF #endif srlg BN_CUR,BN,2 @@ -709,16 +706,18 @@ la B,0(B,LOCAL_VAR2) /*refresh B=B+Bk*1*sizeof(complex) */ ALIGN_2 .L_FUNC_END: /*end*/ -ld %f11,8(%r15) -ld %f10,16(%r15) -ld %f9,24(%r15) -ld %f12,32(%r15) + + #if defined(TRMMKERNEL) ld OFFSET,40(%r15) lmg %r6,%r13,48(%r15) #else lmg %r6,%r12,48(%r15) #endif +ld %f9, 128(%r15) +ld %f10,136(%r15) +ld %f11,144(%r15) +ld %f12,152(%r15) br %r14 .end