Modified aligned size. Added additional prefetch instruction because of cache line is 32 bytes in Loongson 3A.

This commit is contained in:
Xianyi Zhang 2011-01-27 23:07:06 +08:00
parent c0b5992fab
commit ebe2da8474
1 changed files with 12 additions and 9 deletions

View File

@ -72,7 +72,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#define PREFETCH_DISTANCE 48
#define PREFETCH_DISTANCE 48
#define N $4
@ -153,7 +153,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
blez I, .L13
NOP
.align 3
.align 5
.L12:
PREFETCHD(PREFETCH_DISTANCE*SIZE(X))
@ -180,6 +180,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ST t3, 2 * SIZE(Y)
ST t4, 3 * SIZE(Y)
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(X))
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(Y))
MADD t1, b5, ALPHA, a5
LD a5, 12 * SIZE(X)
LD b5, 12 * SIZE(Y)
@ -206,7 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bgtz I, .L12
daddiu X, X, 8 * SIZE
.align 3
.align 5
.L13:
MADD t1, b1, ALPHA, a1
@ -230,7 +233,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
daddiu X, X, 8 * SIZE
daddiu Y, Y, 8 * SIZE
.align 3
.align 5
.L15:
andi I, N, 7
@ -260,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
j $31
NOP
.align 3
.align 5
.L20:
dsra I, N, 3
@ -304,7 +307,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
blez I, .L23
NOP
.align 3
.align 5
.L22:
MADD t1, b1, ALPHA, a1
@ -379,7 +382,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bgtz I, .L22
daddu YY, YY, INCY
.align 3
.align 5
.L23:
MADD t1, b1, ALPHA, a1
@ -411,7 +414,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
daddu YY, YY, INCY
ST t4, 0 * SIZE(YY)
daddu YY, YY, INCY
.align 3
.align 5
.L25:
andi I, N, 7
@ -432,7 +435,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bgtz I, .L26
daddu Y, Y, INCY
.align 3
.align 5
.L999:
#ifndef __64BIT__