Modified aligned size. Added additional prefetch instruction because of cache line is 32 bytes in Loongson 3A.
This commit is contained in:
parent
c0b5992fab
commit
ebe2da8474
|
@ -72,7 +72,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#define PREFETCH_DISTANCE 48
|
#define PREFETCH_DISTANCE 48
|
||||||
|
|
||||||
#define N $4
|
#define N $4
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
blez I, .L13
|
blez I, .L13
|
||||||
NOP
|
NOP
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L12:
|
.L12:
|
||||||
PREFETCHD(PREFETCH_DISTANCE*SIZE(X))
|
PREFETCHD(PREFETCH_DISTANCE*SIZE(X))
|
||||||
|
@ -180,6 +180,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ST t3, 2 * SIZE(Y)
|
ST t3, 2 * SIZE(Y)
|
||||||
ST t4, 3 * SIZE(Y)
|
ST t4, 3 * SIZE(Y)
|
||||||
|
|
||||||
|
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(X))
|
||||||
|
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(Y))
|
||||||
|
|
||||||
MADD t1, b5, ALPHA, a5
|
MADD t1, b5, ALPHA, a5
|
||||||
LD a5, 12 * SIZE(X)
|
LD a5, 12 * SIZE(X)
|
||||||
LD b5, 12 * SIZE(Y)
|
LD b5, 12 * SIZE(Y)
|
||||||
|
@ -206,7 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
bgtz I, .L12
|
bgtz I, .L12
|
||||||
daddiu X, X, 8 * SIZE
|
daddiu X, X, 8 * SIZE
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L13:
|
.L13:
|
||||||
MADD t1, b1, ALPHA, a1
|
MADD t1, b1, ALPHA, a1
|
||||||
|
@ -230,7 +233,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
daddiu X, X, 8 * SIZE
|
daddiu X, X, 8 * SIZE
|
||||||
daddiu Y, Y, 8 * SIZE
|
daddiu Y, Y, 8 * SIZE
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L15:
|
.L15:
|
||||||
andi I, N, 7
|
andi I, N, 7
|
||||||
|
@ -260,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
j $31
|
j $31
|
||||||
NOP
|
NOP
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L20:
|
.L20:
|
||||||
dsra I, N, 3
|
dsra I, N, 3
|
||||||
|
@ -304,7 +307,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
blez I, .L23
|
blez I, .L23
|
||||||
NOP
|
NOP
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L22:
|
.L22:
|
||||||
MADD t1, b1, ALPHA, a1
|
MADD t1, b1, ALPHA, a1
|
||||||
|
@ -379,7 +382,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
bgtz I, .L22
|
bgtz I, .L22
|
||||||
daddu YY, YY, INCY
|
daddu YY, YY, INCY
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L23:
|
.L23:
|
||||||
MADD t1, b1, ALPHA, a1
|
MADD t1, b1, ALPHA, a1
|
||||||
|
@ -411,7 +414,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
daddu YY, YY, INCY
|
daddu YY, YY, INCY
|
||||||
ST t4, 0 * SIZE(YY)
|
ST t4, 0 * SIZE(YY)
|
||||||
daddu YY, YY, INCY
|
daddu YY, YY, INCY
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L25:
|
.L25:
|
||||||
andi I, N, 7
|
andi I, N, 7
|
||||||
|
@ -432,7 +435,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
bgtz I, .L26
|
bgtz I, .L26
|
||||||
daddu Y, Y, INCY
|
daddu Y, Y, INCY
|
||||||
.align 3
|
.align 5
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
#ifndef __64BIT__
|
#ifndef __64BIT__
|
||||||
|
|
Loading…
Reference in New Issue