Do not use vsub to clear the register values since it doesn't work with non-normal numbers.
This commit is contained in:
parent
5f2fa15e04
commit
594b9f4c73
|
@ -368,11 +368,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.align 5
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vsub.f64 d1 , d1 , d1
|
||||
vldr.f64 d0 , =0
|
||||
vldr.f64 d1 , =0
|
||||
#else
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vsub.f32 s1 , s1 , s1
|
||||
vldr.f32 s0 , =0
|
||||
vldr.f32 s1 , =0
|
||||
#endif
|
||||
|
||||
cmp N, #0
|
||||
|
|
|
@ -188,10 +188,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vsub.f32 s1 , s1 , s1
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vsub.f32 s3 , s3 , s3
|
||||
vldr.f32 s0 , =0
|
||||
vldr.f32 s1 , =0
|
||||
vldr.f32 s2 , =0
|
||||
vldr.f32 s3 , =0
|
||||
|
||||
cmp N, #0
|
||||
ble cdot_kernel_L999
|
||||
|
|
|
@ -138,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -340,7 +340,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s12, s8
|
||||
vmov.f32 s13, s8
|
||||
|
@ -514,7 +514,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -681,7 +681,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
|
|
@ -147,7 +147,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -368,7 +367,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -550,7 +548,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -730,7 +727,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s25, s16
|
||||
|
@ -1008,9 +1004,12 @@ cgemm_kernel_L2_M2_32:
|
|||
|
||||
b cgemm_kernel_L2_M2_44
|
||||
|
||||
cgemm_f32zero:
|
||||
.word 0x00000000
|
||||
|
||||
cgemm_kernel_L2_M2_40:
|
||||
|
||||
vldr.f32 s16 , cgemm_f32zero
|
||||
INIT2x2
|
||||
|
||||
|
||||
|
@ -1044,6 +1043,7 @@ cgemm_kernel_L2_M1_BEGIN:
|
|||
|
||||
cgemm_kernel_L2_M1_20:
|
||||
|
||||
vldr.f32 s16 , cgemm_f32zero
|
||||
INIT1x2
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1219,6 +1219,7 @@ cgemm_kernel_L1_M2_32:
|
|||
|
||||
cgemm_kernel_L1_M2_40:
|
||||
|
||||
vldr.f32 s16 , =0
|
||||
INIT2x1
|
||||
|
||||
|
||||
|
@ -1252,6 +1253,7 @@ cgemm_kernel_L1_M1_BEGIN:
|
|||
|
||||
cgemm_kernel_L1_M1_20:
|
||||
|
||||
vldr.f32 s16 , =0
|
||||
INIT1x1
|
||||
|
||||
mov BO, BC
|
||||
|
|
|
@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro INIT_F4
|
||||
|
||||
pld [ YO, #Y_PRE ]
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
@ -267,7 +267,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S4
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
|
|
@ -116,10 +116,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
vsub.f32 s14, s14, s14
|
||||
vsub.f32 s15, s15, s15
|
||||
vldr.f32 s12, =0
|
||||
vldr.f32 s13, =0
|
||||
vldr.f32 s14, =0
|
||||
vldr.f32 s15, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -172,8 +172,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
vldr.f32 s12, =0
|
||||
vldr.f32 s13, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -215,10 +215,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
vsub.f32 s14, s14, s14
|
||||
vsub.f32 s15, s15, s15
|
||||
vldr.f32 s12, =0
|
||||
vldr.f32 s13, =0
|
||||
vldr.f32 s14, =0
|
||||
vldr.f32 s15, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -281,8 +281,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
vldr.f32 s12, =0
|
||||
vldr.f32 s13, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -301,10 +301,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
flds s0, ALPHA_R
|
||||
flds s1, ALPHA_I
|
||||
|
||||
vsub.f32 s4, s4, s4
|
||||
vsub.f32 s5, s5, s5
|
||||
vsub.f32 s6, s6, s6
|
||||
vsub.f32 s7, s7, s7
|
||||
vldr.f32 s4, =0
|
||||
vldr.f32 s5, =0
|
||||
vldr.f32 s6, =0
|
||||
vldr.f32 s7, =0
|
||||
|
||||
FMAC_R1 s4 , s0 , s8
|
||||
FMAC_I1 s5 , s0 , s9
|
||||
|
@ -318,10 +318,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fstmias CO1, { s4 - s7 }
|
||||
|
||||
vsub.f32 s4, s4, s4
|
||||
vsub.f32 s5, s5, s5
|
||||
vsub.f32 s6, s6, s6
|
||||
vsub.f32 s7, s7, s7
|
||||
vldr.f32 s4, =0
|
||||
vldr.f32 s5, =0
|
||||
vldr.f32 s6, =0
|
||||
vldr.f32 s7, =0
|
||||
|
||||
FMAC_R1 s4 , s0 , s12
|
||||
FMAC_I1 s5 , s0 , s13
|
||||
|
@ -343,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s12, s8
|
||||
vmov.f32 s13, s8
|
||||
|
@ -490,8 +490,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
flds s0, ALPHA_R
|
||||
flds s1, ALPHA_I
|
||||
|
||||
vsub.f32 s4, s4, s4
|
||||
vsub.f32 s5, s5, s5
|
||||
vldr.f32 s4, =0
|
||||
vldr.f32 s5, =0
|
||||
|
||||
FMAC_R1 s4 , s0 , s8
|
||||
FMAC_I1 s5 , s0 , s9
|
||||
|
@ -500,8 +500,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fstmias CO1, { s4 - s5 }
|
||||
|
||||
vsub.f32 s4, s4, s4
|
||||
vsub.f32 s5, s5, s5
|
||||
vldr.f32 s4, =0
|
||||
vldr.f32 s5, =0
|
||||
|
||||
FMAC_R1 s4 , s0 , s12
|
||||
FMAC_I1 s5 , s0 , s13
|
||||
|
@ -519,7 +519,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -663,10 +663,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
flds s0, ALPHA_R
|
||||
flds s1, ALPHA_I
|
||||
|
||||
vsub.f32 s4, s4, s4
|
||||
vsub.f32 s5, s5, s5
|
||||
vsub.f32 s6, s6, s6
|
||||
vsub.f32 s7, s7, s7
|
||||
vldr.f32 s4, =0
|
||||
vldr.f32 s5, =0
|
||||
vldr.f32 s6, =0
|
||||
vldr.f32 s7, =0
|
||||
|
||||
FMAC_R1 s4 , s0 , s8
|
||||
FMAC_I1 s5 , s0 , s9
|
||||
|
@ -689,7 +689,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
@ -795,8 +795,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
flds s0, ALPHA_R
|
||||
flds s1, ALPHA_I
|
||||
|
||||
vsub.f32 s4, s4, s4
|
||||
vsub.f32 s5, s5, s5
|
||||
vldr.f32 s4, =0
|
||||
vldr.f32 s5, =0
|
||||
|
||||
FMAC_R1 s4 , s0 , s8
|
||||
FMAC_I1 s5 , s0 , s9
|
||||
|
|
|
@ -134,7 +134,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -351,7 +350,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -529,7 +527,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -706,7 +703,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s25, s16
|
||||
|
@ -1020,9 +1016,12 @@ _L2_M2_32:
|
|||
|
||||
b _L2_M2_44
|
||||
|
||||
ctrmm_f32zero:
|
||||
.word 0x00000000
|
||||
|
||||
_L2_M2_40:
|
||||
|
||||
vldr.f32 s16 , ctrmm_f32zero
|
||||
INIT2x2
|
||||
|
||||
|
||||
|
@ -1074,6 +1073,7 @@ _L2_M1_BEGIN:
|
|||
|
||||
_L2_M1_20:
|
||||
|
||||
vldr.f32 s16 , ctrmm_f32zero
|
||||
INIT1x2
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1337,6 +1337,7 @@ _L1_M2_32:
|
|||
|
||||
_L1_M2_40:
|
||||
|
||||
vldr.f32 s16 , =0
|
||||
INIT2x1
|
||||
|
||||
|
||||
|
@ -1389,6 +1390,7 @@ _L1_M1_BEGIN:
|
|||
|
||||
_L1_M1_20:
|
||||
|
||||
vldr.f32 s16 , =0
|
||||
INIT1x1
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
|
|
@ -152,8 +152,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vsub.f64 d1 , d1 , d1
|
||||
vldr.f64 d0 , =0
|
||||
vldr.f64 d1 , =0
|
||||
|
||||
cmp N, #0
|
||||
ble ddot_kernel_L999
|
||||
|
|
|
@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9, d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -173,7 +173,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9, d8
|
||||
vmov.f64 d12, d8
|
||||
vmov.f64 d13, d8
|
||||
|
@ -233,7 +233,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d12, d8
|
||||
|
||||
.endm
|
||||
|
@ -283,7 +283,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9, d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -338,7 +338,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
@ -380,7 +380,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -376,7 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -470,7 +470,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d28, d16
|
||||
|
@ -533,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -617,7 +617,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -678,7 +678,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d20, d16
|
||||
|
||||
.endm
|
||||
|
@ -723,7 +723,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -782,7 +782,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
|
||||
.endm
|
||||
|
@ -826,7 +826,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -90,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9, d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -165,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9, d8
|
||||
vmov.f64 d12, d8
|
||||
vmov.f64 d13, d8
|
||||
|
@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d12, d8
|
||||
|
||||
.endm
|
||||
|
@ -268,7 +268,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9, d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -318,7 +318,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
@ -357,7 +357,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -89,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -468,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d28, d16
|
||||
|
@ -527,7 +527,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -601,7 +601,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -656,7 +656,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d20, d16
|
||||
|
||||
.endm
|
||||
|
@ -699,7 +699,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -753,7 +753,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
|
||||
.endm
|
||||
|
@ -794,7 +794,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
pld [ YO , #Y_PRE ]
|
||||
pld [ YO , #Y_PRE+32 ]
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10 , d8
|
||||
vmov.f64 d11 , d8
|
||||
|
@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d12 , d12 , d12
|
||||
vldr.f64 d12 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -185,7 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S4
|
||||
|
||||
vsub.f64 d12 , d12 , d12
|
||||
vldr.f64 d12 , =0
|
||||
vmov.f64 d13 , d12
|
||||
vmov.f64 d14 , d12
|
||||
vmov.f64 d15 , d12
|
||||
|
@ -245,7 +245,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d12 , d12 , d12
|
||||
vldr.f64 d12 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -279,7 +279,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
pld [ YO , #Y_PRE ]
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10 , s8
|
||||
vmov.f32 s11 , s8
|
||||
|
@ -357,7 +357,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s12 , s12 , s12
|
||||
vldr.f32 s12 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S4
|
||||
|
||||
vsub.f32 s12 , s12 , s12
|
||||
vldr.f32 s12 , =0
|
||||
vmov.f32 s13 , s12
|
||||
vmov.f32 s14 , s12
|
||||
vmov.f32 s15 , s12
|
||||
|
@ -445,7 +445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s12 , s12 , s12
|
||||
vldr.f32 s12 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
pld [ YO , #Y_PRE ]
|
||||
pld [ YO , #Y_PRE+32 ]
|
||||
|
||||
vsub.f64 d24 , d24 , d24
|
||||
vldr.f64 d24 , =0
|
||||
vmov.f64 d25 , d24
|
||||
vmov.f64 d26 , d24
|
||||
vmov.f64 d27 , d24
|
||||
|
@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d24 , d24 , d24
|
||||
vldr.f64 d24 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -175,7 +175,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S8
|
||||
|
||||
vsub.f64 d24 , d24 , d24
|
||||
vldr.f64 d24 , =0
|
||||
vmov.f64 d25 , d24
|
||||
vmov.f64 d26 , d24
|
||||
vmov.f64 d27 , d24
|
||||
|
@ -269,7 +269,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d24 , d24 , d24
|
||||
vldr.f64 d24 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -302,7 +302,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
pld [ YO , #Y_PRE ]
|
||||
|
||||
vsub.f32 s24 , s24 , s24
|
||||
vldr.f32 s24 , =0
|
||||
vmov.f32 s25 , s24
|
||||
vmov.f32 s26 , s24
|
||||
vmov.f32 s27 , s24
|
||||
|
@ -368,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s24 , s24 , s24
|
||||
vldr.f32 s24 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -396,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S8
|
||||
|
||||
vsub.f32 s24 , s24 , s24
|
||||
vldr.f32 s24 , =0
|
||||
vmov.f32 s25 , s24
|
||||
vmov.f32 s26 , s24
|
||||
vmov.f32 s27 , s24
|
||||
|
@ -489,7 +489,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s24 , s24 , s24
|
||||
vldr.f32 s24 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -75,8 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f64 d2 , d2 , d2
|
||||
vsub.f64 d3 , d3 , d3
|
||||
vldr.f64 d2 , =0
|
||||
vldr.f64 d3 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -123,7 +123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d2 , d2 , d2
|
||||
vldr.f64 d2 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f64 d2 , d2 , d2
|
||||
vsub.f64 d3 , d3 , d3
|
||||
vldr.f64 d2 , =0
|
||||
vldr.f64 d3 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -224,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d2 , d2 , d2
|
||||
vldr.f64 d2 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -276,8 +276,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vsub.f32 s3 , s3 , s3
|
||||
vldr.f32 s2 , =0
|
||||
vldr.f32 s3 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -321,7 +321,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vldr.f32 s2 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -356,8 +356,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vsub.f32 s3 , s3 , s3
|
||||
vldr.f32 s2 , =0
|
||||
vldr.f32 s3 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -418,7 +418,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vldr.f32 s2 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -75,8 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f64 d4 , d4 , d4
|
||||
vsub.f64 d5 , d5 , d5
|
||||
vldr.f64 d4 , =0
|
||||
vldr.f64 d5 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -123,8 +123,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f64 d4 , d4 , d4
|
||||
vsub.f64 d5 , d5 , d5
|
||||
vldr.f64 d4 , =0
|
||||
vldr.f64 d5 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -183,7 +183,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d4 , d4 , d4
|
||||
vldr.f64 d4 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d4 , d4 , d4
|
||||
vldr.f64 d4 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -268,8 +268,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f32 s4 , s4 , s4
|
||||
vsub.f32 s5 , s5 , s5
|
||||
vldr.f32 s4 , =0
|
||||
vldr.f32 s5 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -313,8 +313,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f32 s4 , s4 , s4
|
||||
vsub.f32 s5 , s5 , s5
|
||||
vldr.f32 s4 , =0
|
||||
vldr.f32 s5 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -371,7 +371,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s4 , s4 , s4
|
||||
vldr.f32 s4 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -406,7 +406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s4 , s4 , s4
|
||||
vldr.f32 s4 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -342,9 +342,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
push {r4}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vldr.f64 d0 , =0
|
||||
#else
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vldr.f32 s0 , =0
|
||||
#endif
|
||||
mov INDEX, #0
|
||||
|
||||
|
|
|
@ -446,12 +446,12 @@ nrm2_begin:
|
|||
#if defined(COMPLEX)
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vsub.f64 d0 , d0 , d0 // scale=0.0
|
||||
vldr.f64 d0 , =0 // scale=0.0
|
||||
vldr.64 d1 , znrm2_one // ssq=1.0
|
||||
vmov.f64 d7 , d1 // value 1.0
|
||||
vmov.f64 d6 , d0 // value 0.0
|
||||
#else
|
||||
vsub.f32 s0 , s0 , s0 // scale=0.0
|
||||
vldr.f32 s0 , =0 // scale=0.0
|
||||
vldr.32 s1 , cnrm2_one // ssq=1.0
|
||||
vmov.f32 s7 , s1 // value 1.0
|
||||
vmov.f32 s6 , s0 // value 0.0
|
||||
|
@ -460,12 +460,12 @@ nrm2_begin:
|
|||
#else
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vsub.f64 d0 , d0 , d0 // scale=0.0
|
||||
vldr.f64 d0 , =0 // scale=0.0
|
||||
vldr.64 d1 , dnrm2_one // ssq=1.0
|
||||
vmov.f64 d7 , d1 // value 1.0
|
||||
vmov.f64 d6 , d0 // value 0.0
|
||||
#else
|
||||
vsub.f32 s0 , s0 , s0 // scale=0.0
|
||||
vldr.f32 s0 , =0 // scale=0.0
|
||||
vldr.32 s1 , snrm2_one // ssq=1.0
|
||||
vmov.f32 s7 , s1 // value 1.0
|
||||
vmov.f32 s6 , s0 // value 0.0
|
||||
|
|
|
@ -400,17 +400,22 @@ KERNEL_S1_END_\@:
|
|||
* End of macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
nrm2_zeros:
|
||||
.align 5
|
||||
.word 0x00000000
|
||||
.word 0x00000000
|
||||
|
||||
PROLOGUE
|
||||
|
||||
.align 5
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vsub.f64 d0 , d0 , d0 // scale=0.0
|
||||
vldr.f64 d0 , nrm2_zeros // scale=0.0
|
||||
vmov.f64 d1 , #1.0 // ssq=1.0
|
||||
vmov.f64 d7 , d1 // value 1.0
|
||||
vmov.f64 d6 , d0 // value 0.0
|
||||
#else
|
||||
vsub.f32 s0 , s0 , s0 // scale=0.0
|
||||
vldr.f32 s0 , nrm2_zeros // scale=0.0
|
||||
vmov.f32 s1 , #1.0 // ssq=1.0
|
||||
vmov.f32 s7 , s1 // value 1.0
|
||||
vmov.f32 s6 , s0 // value 0.0
|
||||
|
|
|
@ -242,13 +242,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(DSDOT)
|
||||
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vsub.f64 d1 , d1 , d1
|
||||
vldr.f64 d0 , =0
|
||||
vldr.f64 d1 , =0
|
||||
|
||||
#else
|
||||
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vsub.f32 s1 , s1 , s1
|
||||
vldr.f32 s0 , =0
|
||||
vldr.f32 s1 , =0
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9, s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9, s8
|
||||
vmov.f32 s12, s8
|
||||
vmov.f32 s13, s8
|
||||
|
@ -221,7 +221,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s12, s8
|
||||
|
||||
.endm
|
||||
|
@ -271,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9, s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -326,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
@ -368,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -102,7 +102,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -349,7 +348,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -443,7 +441,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s28, s16
|
||||
|
@ -506,7 +503,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -590,7 +586,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -651,7 +646,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s20, s16
|
||||
|
||||
.endm
|
||||
|
@ -696,7 +690,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -755,7 +748,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
|
||||
.endm
|
||||
|
@ -797,13 +789,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
.macro KERNEL1x1_SUB
|
||||
|
||||
|
@ -930,9 +915,24 @@ sgemm_kernel_L4_M4_32:
|
|||
|
||||
b sgemm_kernel_L4_M4_44
|
||||
|
||||
/* Note on loading zero into a fp register
|
||||
* vsub doesn't work since it cannot handle NaN and infinity
|
||||
* vmov.Fn doesn't work with 0
|
||||
* vmov.In and veor are neon
|
||||
* vldr , =0 doesn't work since the function is larger than 2KB
|
||||
* and the assembler is not able to insert constant pool inside
|
||||
* the function body.
|
||||
*
|
||||
* Therefore, the best way I've found is to manually create multiple
|
||||
* copies of the zero constant and `vldr` from different ones depending
|
||||
* on where the instruction is.
|
||||
*/
|
||||
sgemm_f32zero:
|
||||
.word 0x00000000
|
||||
|
||||
sgemm_kernel_L4_M4_40:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero
|
||||
INIT4x4
|
||||
|
||||
|
||||
|
@ -969,6 +969,7 @@ sgemm_kernel_L4_M2_BEGIN:
|
|||
|
||||
sgemm_kernel_L4_M2_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero
|
||||
INIT2x4
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1014,9 +1015,14 @@ sgemm_kernel_L4_M1_BEGIN:
|
|||
|
||||
tst I, #1 // I = I % 2
|
||||
ble sgemm_kernel_L4_END
|
||||
b sgemm_kernel_L4_M1_20
|
||||
|
||||
sgemm_f32zero4:
|
||||
.word 0x00000000
|
||||
|
||||
sgemm_kernel_L4_M1_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero4
|
||||
INIT1x4
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1100,6 +1106,7 @@ sgemm_kernel_L2_M4_BEGIN:
|
|||
|
||||
sgemm_kernel_L2_M4_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero3
|
||||
INIT4x2
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1121,7 +1128,6 @@ sgemm_kernel_L2_M4_22:
|
|||
subs L, L, #1
|
||||
bgt sgemm_kernel_L2_M4_22
|
||||
|
||||
|
||||
sgemm_kernel_L2_M4_40:
|
||||
|
||||
ands L , K1, #7 // L = L % 8
|
||||
|
@ -1142,7 +1148,10 @@ sgemm_kernel_L2_M4_END:
|
|||
|
||||
subs I, I, #1
|
||||
bgt sgemm_kernel_L2_M4_20
|
||||
b sgemm_kernel_L2_M2_BEGIN
|
||||
|
||||
sgemm_f32zero3:
|
||||
.word 0x00000000
|
||||
|
||||
sgemm_kernel_L2_M2_BEGIN:
|
||||
|
||||
|
@ -1155,6 +1164,7 @@ sgemm_kernel_L2_M2_BEGIN:
|
|||
|
||||
sgemm_kernel_L2_M2_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero3
|
||||
INIT2x2
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1203,6 +1213,7 @@ sgemm_kernel_L2_M1_BEGIN:
|
|||
|
||||
sgemm_kernel_L2_M1_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero3
|
||||
INIT1x2
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1278,6 +1289,7 @@ sgemm_kernel_L1_M4_BEGIN:
|
|||
|
||||
sgemm_kernel_L1_M4_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero3
|
||||
INIT4x1
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1333,6 +1345,7 @@ sgemm_kernel_L1_M2_BEGIN:
|
|||
|
||||
sgemm_kernel_L1_M2_20:
|
||||
|
||||
vldr.f32 s16 , sgemm_f32zero2
|
||||
INIT2x1
|
||||
|
||||
mov BO, BC
|
||||
|
@ -1381,7 +1394,7 @@ sgemm_kernel_L1_M1_BEGIN:
|
|||
|
||||
sgemm_kernel_L1_M1_20:
|
||||
|
||||
INIT1x1
|
||||
vldr.f32 s16 , sgemm_f32zero2
|
||||
|
||||
mov BO, BC
|
||||
asrs L , K1, #3 // L = L / 8
|
||||
|
@ -1434,3 +1447,5 @@ sgemm_kernel_L999:
|
|||
|
||||
EPILOGUE
|
||||
|
||||
sgemm_f32zero2:
|
||||
.word 0x00000000
|
||||
|
|
|
@ -90,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9, s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -156,7 +156,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9, s8
|
||||
vmov.f32 s12, s8
|
||||
vmov.f32 s13, s8
|
||||
|
@ -211,7 +211,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s12, s8
|
||||
|
||||
.endm
|
||||
|
@ -259,7 +259,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9, s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -309,7 +309,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
vldr.f32 s8 , =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -88,7 +88,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -322,7 +321,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -405,7 +403,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s28, s16
|
||||
|
@ -464,7 +461,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -538,7 +534,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -593,7 +588,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s20, s16
|
||||
|
||||
.endm
|
||||
|
@ -636,7 +630,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -690,7 +683,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
vmov.f32 s17, s16
|
||||
|
||||
.endm
|
||||
|
@ -729,13 +721,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
.macro KERNEL1x1_SUB
|
||||
|
||||
|
@ -955,9 +940,24 @@ _L4_M4_32:
|
|||
|
||||
b _L4_M4_44
|
||||
|
||||
/* Note on loading zero into a fp register
|
||||
* vsub doesn't work since it cannot handle NaN and infinity
|
||||
* vmov.Fn doesn't work with 0
|
||||
* vmov.In and veor are neon
|
||||
* vldr , =0 doesn't work since the function is larger than 2KB
|
||||
* and the assembler is not able to insert constant pool inside
|
||||
* the function body.
|
||||
*
|
||||
* Therefore, the best way I've found is to manually create multiple
|
||||
* copies of the zero constant and `vldr` from different ones depending
|
||||
* on where the instruction is.
|
||||
*/
|
||||
strmm_f32zero:
|
||||
.word 0x00000000
|
||||
|
||||
_L4_M4_40:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero
|
||||
INIT4x4
|
||||
|
||||
|
||||
|
@ -1014,6 +1014,7 @@ _L4_M2_BEGIN:
|
|||
|
||||
_L4_M2_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero
|
||||
INIT2x4
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1112,6 +1113,7 @@ _L4_M1_BEGIN:
|
|||
|
||||
_L4_M1_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero3
|
||||
INIT1x4
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1252,9 +1254,14 @@ _L2_M4_BEGIN:
|
|||
ldr I, M
|
||||
asrs I, I, #2 // I = I / 4
|
||||
ble _L2_M2_BEGIN
|
||||
b _L2_M4_20
|
||||
|
||||
strmm_f32zero3:
|
||||
.word 0x00000000
|
||||
|
||||
_L2_M4_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero3
|
||||
INIT4x2
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1360,6 +1367,7 @@ _L2_M2_BEGIN:
|
|||
|
||||
_L2_M2_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero3
|
||||
INIT2x2
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1458,6 +1466,7 @@ _L2_M1_BEGIN:
|
|||
|
||||
_L2_M1_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero4
|
||||
INIT1x2
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1591,9 +1600,14 @@ _L1_M4_BEGIN:
|
|||
ldr I, M
|
||||
asrs I, I, #2 // I = I / 4
|
||||
ble _L1_M2_BEGIN
|
||||
b _L1_M4_20
|
||||
|
||||
strmm_f32zero4:
|
||||
.word 0x00000000
|
||||
|
||||
_L1_M4_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero4
|
||||
INIT4x1
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1700,6 +1714,7 @@ _L1_M2_BEGIN:
|
|||
|
||||
_L1_M2_20:
|
||||
|
||||
vldr.f32 s16 , strmm_f32zero2
|
||||
INIT2x1
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
|
@ -1798,7 +1813,7 @@ _L1_M1_BEGIN:
|
|||
|
||||
_L1_M1_20:
|
||||
|
||||
INIT1x1
|
||||
vldr.f32 s16 , strmm_f32zero2
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || \
|
||||
(!defined(LEFT) && !defined(TRANSA))
|
||||
|
@ -1882,3 +1897,5 @@ _L999:
|
|||
|
||||
EPILOGUE
|
||||
|
||||
strmm_f32zero2:
|
||||
.word 0x00000000
|
||||
|
|
|
@ -190,10 +190,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vsub.f64 d1 , d1 , d1
|
||||
vsub.f64 d2 , d2 , d2
|
||||
vsub.f64 d3 , d3 , d3
|
||||
vldr.f64 d0 , =0
|
||||
vldr.f64 d1 , =0
|
||||
vldr.f64 d2 , =0
|
||||
vldr.f64 d3 , =0
|
||||
|
||||
cmp N, #0
|
||||
ble zdot_kernel_L999
|
||||
|
|
|
@ -131,7 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -383,7 +383,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d12, d8
|
||||
vmov.f64 d13, d8
|
||||
|
@ -557,7 +557,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -724,7 +724,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
|
|
@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -404,7 +404,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -586,7 +586,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -766,7 +766,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d25, d16
|
||||
|
|
|
@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro INIT_F4
|
||||
|
||||
pld [ YO, #Y_PRE ]
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -222,7 +222,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
@ -269,7 +269,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S4
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
|
|
@ -117,10 +117,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
vsub.f64 d14, d14, d14
|
||||
vsub.f64 d15, d15, d15
|
||||
vldr.f64 d12, =0
|
||||
vldr.f64 d13, =0
|
||||
vldr.f64 d14, =0
|
||||
vldr.f64 d15, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -173,8 +173,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
vldr.f64 d12, =0
|
||||
vldr.f64 d13, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -216,10 +216,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
vsub.f64 d14, d14, d14
|
||||
vsub.f64 d15, d15, d15
|
||||
vldr.f64 d12, =0
|
||||
vldr.f64 d13, =0
|
||||
vldr.f64 d14, =0
|
||||
vldr.f64 d15, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
vldr.f64 d12, =0
|
||||
vldr.f64 d13, =0
|
||||
|
||||
.endm
|
||||
|
||||
|
|
|
@ -140,7 +140,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -356,10 +356,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
fldd d0, ALPHA_R
|
||||
fldd d1, ALPHA_I
|
||||
|
||||
vsub.f64 d4, d4 , d4
|
||||
vsub.f64 d5, d5 , d5
|
||||
vsub.f64 d6, d6 , d6
|
||||
vsub.f64 d7, d7 , d7
|
||||
vldr.f64 d4, =0
|
||||
vldr.f64 d5, =0
|
||||
vldr.f64 d6, =0
|
||||
vldr.f64 d7, =0
|
||||
|
||||
FMAC_R1 d4 , d0 , d8
|
||||
FMAC_I1 d5 , d0 , d9
|
||||
|
@ -373,10 +373,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fstmiad CO1, { d4 - d7 }
|
||||
|
||||
vsub.f64 d4, d4 , d4
|
||||
vsub.f64 d5, d5 , d5
|
||||
vsub.f64 d6, d6 , d6
|
||||
vsub.f64 d7, d7 , d7
|
||||
vldr.f64 d4, =0
|
||||
vldr.f64 d5, =0
|
||||
vldr.f64 d6, =0
|
||||
vldr.f64 d7, =0
|
||||
|
||||
FMAC_R1 d4 , d0 , d12
|
||||
FMAC_I1 d5 , d0 , d13
|
||||
|
@ -398,7 +398,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d12, d8
|
||||
vmov.f64 d13, d8
|
||||
|
@ -545,8 +545,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
fldd d0, ALPHA_R
|
||||
fldd d1, ALPHA_I
|
||||
|
||||
vsub.f64 d4, d4 , d4
|
||||
vsub.f64 d5, d5 , d5
|
||||
vldr.f64 d4, =0
|
||||
vldr.f64 d5, =0
|
||||
|
||||
FMAC_R1 d4 , d0 , d8
|
||||
FMAC_I1 d5 , d0 , d9
|
||||
|
@ -555,8 +555,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fstmiad CO1, { d4 - d5 }
|
||||
|
||||
vsub.f64 d4, d4 , d4
|
||||
vsub.f64 d5, d5 , d5
|
||||
vldr.f64 d4, =0
|
||||
vldr.f64 d5, =0
|
||||
|
||||
FMAC_R1 d4 , d0 , d12
|
||||
FMAC_I1 d5 , d0 , d13
|
||||
|
@ -574,7 +574,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -718,10 +718,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
fldd d0, ALPHA_R
|
||||
fldd d1, ALPHA_I
|
||||
|
||||
vsub.f64 d4, d4 , d4
|
||||
vsub.f64 d5, d5 , d5
|
||||
vsub.f64 d6, d6 , d6
|
||||
vsub.f64 d7, d7 , d7
|
||||
vldr.f64 d4, =0
|
||||
vldr.f64 d5, =0
|
||||
vldr.f64 d6, =0
|
||||
vldr.f64 d7, =0
|
||||
|
||||
FMAC_R1 d4 , d0 , d8
|
||||
FMAC_I1 d5 , d0 , d9
|
||||
|
@ -744,7 +744,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
vldr.f64 d8 , =0
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
@ -850,8 +850,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
fldd d0, ALPHA_R
|
||||
fldd d1, ALPHA_I
|
||||
|
||||
vsub.f64 d4, d4 , d4
|
||||
vsub.f64 d5, d5 , d5
|
||||
vldr.f64 d4, =0
|
||||
vldr.f64 d5, =0
|
||||
|
||||
FMAC_R1 d4 , d0 , d8
|
||||
FMAC_I1 d5 , d0 , d9
|
||||
|
|
|
@ -134,7 +134,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -388,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -566,7 +566,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -743,7 +743,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
vldr.f64 d16 , =0
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d25, d16
|
||||
|
|
|
@ -797,7 +797,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* b
|
|||
res1 = res1-load0*load3;
|
||||
#endif
|
||||
#if defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
||||
load0 = ptrba[2*0+0];
|
||||
load0 = ptrba[2*0+0];
|
||||
load1 = ptrbb[2*0+0];
|
||||
res0 = res0+load0*load1;
|
||||
load2 = ptrba[2*0+1];
|
||||
|
|
Loading…
Reference in New Issue