Merge pull request #4479 from XiWeiGu/loongarch-opt-axpby

Loongarch opt axpby
This commit is contained in:
Martin Kroeker
2024-02-04 19:50:28 +01:00
committed by GitHub
9 changed files with 2415 additions and 5 deletions

View File

@@ -58,6 +58,8 @@ ZAXPYKERNEL = caxpy_lsx.S
SAXPBYKERNEL = axpby_lsx.S
DAXPBYKERNEL = axpby_lsx.S
CAXPBYKERNEL = caxpby_lsx.S
ZAXPBYKERNEL = caxpby_lsx.S
SSUMKERNEL = sum_lsx.S
DSUMKERNEL = sum_lsx.S

View File

@@ -58,6 +58,8 @@ ZAXPYKERNEL = caxpy_lasx.S
SAXPBYKERNEL = axpby_lasx.S
DAXPBYKERNEL = axpby_lasx.S
CAXPBYKERNEL = caxpby_lasx.S
ZAXPBYKERNEL = caxpby_lasx.S
SSUMKERNEL = sum_lasx.S
DSUMKERNEL = sum_lasx.S

View File

@@ -57,10 +57,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
bge $r0, N, .L999
li.d TEMP, 1
movgr2fr.d a1, $r0
ffint.s.l a1, a1
slli.d TEMP, TEMP, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d INCY, INCY, BASE_SHIFT
MTG t1, ALPHA
@@ -75,6 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvreplgr2vr.w VXB, t2
xvreplgr2vr.w VXZ, t3
#endif
// If incx == 0 || incy == 0, do one by one
and TEMP, INCX, INCY
or I, N, N
beqz TEMP, .L998
li.d TEMP, 1
slli.d TEMP, TEMP, BASE_SHIFT
srai.d I, N, 3
bne INCX, TEMP, .L20
bne INCY, TEMP, .L12 // INCX==1 and INCY!=1

View File

@@ -57,10 +57,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
bge $r0, N, .L999
li.d TEMP, 1
movgr2fr.d a1, $r0
ffint.s.l a1, a1
slli.d TEMP, TEMP, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d INCY, INCY, BASE_SHIFT
MTG t1, ALPHA
@@ -75,6 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vreplgr2vr.w VXB, t2
vreplgr2vr.w VXZ, t3
#endif
// If incx == 0 || incy == 0, do one by one
and TEMP, INCX, INCY
or I, N, N
beqz TEMP, .L998
li.d TEMP, 1
slli.d TEMP, TEMP, BASE_SHIFT
srai.d I, N, 3
bne INCX, TEMP, .L20
bne INCY, TEMP, .L12 // INCX==1 and INCY!=1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff