updated cgemv and zgemv kernels for armv6
This commit is contained in:
parent
c65357c566
commit
eaa63165df
|
@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define I r12
|
||||
|
||||
#define FP_ZERO [fp, #-228]
|
||||
#define FP_ZERO_0 [fp, #-228]
|
||||
#define FP_ZERO_1 [fp, #-224]
|
||||
|
||||
#define ALPHA_I [fp, #-236]
|
||||
#define ALPHA_R [fp, #-244]
|
||||
|
||||
|
@ -117,7 +121,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro INIT_F4
|
||||
|
||||
pld [ YO, #Y_PRE ]
|
||||
vsub.f32 s8 , s8 , s8
|
||||
flds s8 , FP_ZERO
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
flds s8 , FP_ZERO
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
@ -267,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S4
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
flds s8 , FP_ZERO
|
||||
vmov.f32 s9 , s8
|
||||
vmov.f32 s10, s8
|
||||
vmov.f32 s11, s8
|
||||
|
@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s8 , s8 , s8
|
||||
flds s8 , FP_ZERO
|
||||
vmov.f32 s9 , s8
|
||||
|
||||
.endm
|
||||
|
@ -448,6 +452,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vstm r12, { s8 - s15 } // store floating point registers
|
||||
#endif
|
||||
|
||||
movs r12, #0
|
||||
str r12, FP_ZERO
|
||||
str r12, FP_ZERO_1
|
||||
|
||||
cmp OLD_M, #0
|
||||
ble cgemvn_kernel_L999
|
||||
|
||||
|
|
|
@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define I r12
|
||||
|
||||
#define FP_ZERO [fp, #-228]
|
||||
#define FP_ZERO_0 [fp, #-228]
|
||||
#define FP_ZERO_1 [fp, #-224]
|
||||
|
||||
#define N [fp, #-252 ]
|
||||
#define A [fp, #-256 ]
|
||||
|
||||
|
@ -116,10 +120,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
vsub.f32 s14, s14, s14
|
||||
vsub.f32 s15, s15, s15
|
||||
flds s12, FP_ZERO
|
||||
vmov.f32 s13, s12
|
||||
vmov.f32 s14, s12
|
||||
vmov.f32 s15, s12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -172,8 +176,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
flds s12, FP_ZERO
|
||||
vmov.f32 s13, s12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -215,10 +219,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
vsub.f32 s14, s14, s14
|
||||
vsub.f32 s15, s15, s15
|
||||
flds s12, FP_ZERO
|
||||
vmov.f32 s13, s12
|
||||
vmov.f32 s14, s12
|
||||
vmov.f32 s15, s12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -281,8 +285,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f32 s12, s12, s12
|
||||
vsub.f32 s13, s13, s13
|
||||
flds s12, FP_ZERO
|
||||
vmov.f32 s13, s12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -345,6 +349,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vstm r12, { s8 - s15 } // store floating point registers
|
||||
#endif
|
||||
|
||||
movs r12, #0
|
||||
str r12, FP_ZERO
|
||||
str r12, FP_ZERO_1
|
||||
|
||||
cmp M, #0
|
||||
ble cgemvt_kernel_L999
|
||||
|
||||
|
|
|
@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define I r12
|
||||
|
||||
#define FP_ZERO [fp, #-228]
|
||||
#define FP_ZERO_0 [fp, #-228]
|
||||
#define FP_ZERO_1 [fp, #-224]
|
||||
|
||||
|
||||
#define ALPHA_I [fp, #-236]
|
||||
#define ALPHA_R [fp, #-244]
|
||||
|
||||
|
@ -117,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro INIT_F4
|
||||
|
||||
pld [ YO, #Y_PRE ]
|
||||
vsub.f64 d8 , d8 , d8
|
||||
fldd d8, FP_ZERO
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -222,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
fldd d8, FP_ZERO
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
@ -269,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S4
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
fldd d8, FP_ZERO
|
||||
vmov.f64 d9 , d8
|
||||
vmov.f64 d10, d8
|
||||
vmov.f64 d11, d8
|
||||
|
@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d8 , d8 , d8
|
||||
fldd d8, FP_ZERO
|
||||
vmov.f64 d9 , d8
|
||||
|
||||
.endm
|
||||
|
@ -450,6 +455,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vstm r12, { s8 - s15 } // store floating point registers
|
||||
#endif
|
||||
|
||||
movs r12, #0
|
||||
str r12, FP_ZERO
|
||||
str r12, FP_ZERO_1
|
||||
|
||||
cmp OLD_M, #0
|
||||
ble zgemvn_kernel_L999
|
||||
|
||||
|
|
|
@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define I r12
|
||||
|
||||
#define FP_ZERO [fp, #-228]
|
||||
#define FP_ZERO_0 [fp, #-228]
|
||||
#define FP_ZERO_1 [fp, #-224]
|
||||
|
||||
#define N [fp, #-252 ]
|
||||
#define A [fp, #-256 ]
|
||||
|
||||
|
@ -117,10 +121,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F2
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
vsub.f64 d14, d14, d14
|
||||
vsub.f64 d15, d15, d15
|
||||
fldd d12, FP_ZERO
|
||||
vmov.f64 d13, d12
|
||||
vmov.f64 d14, d12
|
||||
vmov.f64 d15, d12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -173,8 +177,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
fldd d12, FP_ZERO
|
||||
vmov.f64 d13, d12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -216,10 +220,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S2
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
vsub.f64 d14, d14, d14
|
||||
vsub.f64 d15, d15, d15
|
||||
fldd d12, FP_ZERO
|
||||
vmov.f64 d13, d12
|
||||
vmov.f64 d14, d12
|
||||
vmov.f64 d15, d12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -282,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_S1
|
||||
|
||||
vsub.f64 d12, d12, d12
|
||||
vsub.f64 d13, d13, d13
|
||||
fldd d12, FP_ZERO
|
||||
vmov.f64 d13, d12
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -346,6 +350,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vstm r12, { s8 - s15 } // store floating point registers
|
||||
#endif
|
||||
|
||||
movs r12, #0
|
||||
str r12, FP_ZERO
|
||||
str r12, FP_ZERO_1
|
||||
|
||||
cmp M, #0
|
||||
ble zgemvt_kernel_L999
|
||||
|
||||
|
|
Loading…
Reference in New Issue