updated cdot and zdot on arm
This commit is contained in:
parent
d2f84c9c8a
commit
aafd3ab60e
|
@ -185,14 +185,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r4, fp, #128
|
||||
vstm r4, { s8 - s15} // store floating point registers
|
||||
|
||||
movs r4, #0 // clear floating point register
|
||||
vmov s0, r4
|
||||
vmov s1, s0
|
||||
vmov s2, s0
|
||||
vmov s3, s0
|
||||
|
||||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vsub.f32 s1 , s1 , s1
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vsub.f32 s3 , s3 , s3
|
||||
|
||||
cmp N, #0
|
||||
ble cdot_kernel_L999
|
||||
|
||||
|
|
|
@ -187,13 +187,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r4, fp, #128
|
||||
vstm r4, { d8 - d15} // store floating point registers
|
||||
|
||||
movs r4, #0 // clear floating point register
|
||||
vmov s0, r4
|
||||
vcvt.f64.f32 d0, s0
|
||||
vcvt.f64.f32 d1, s0
|
||||
vcvt.f64.f32 d2, s0
|
||||
vcvt.f64.f32 d3, s0
|
||||
|
||||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vsub.f64 d1 , d1 , d1
|
||||
vsub.f64 d2 , d2 , d2
|
||||
vsub.f64 d3 , d3 , d3
|
||||
|
||||
cmp N, #0
|
||||
ble zdot_kernel_L999
|
||||
|
|
Loading…
Reference in New Issue