From 47b639cc9b4ff900f7b83751af9d1c4ff9dea3c1 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 7 Dec 2020 10:04:00 +0800 Subject: [PATCH] Fix failed sswap and dswap case by using msa optimization The swap test case will call sswap_msa.c and dswap_msa.c files in MIPS environmnet. When inc_x or inc_y is equal to zero, the calculation result of the two functions will be wrong. This patch adds the processing of inc_x or inc_y equal to zero, and the swap test case has passed. --- kernel/mips/dswap_msa.c | 30 ++++++++++++++++++++++++++++-- kernel/mips/sswap_msa.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/kernel/mips/dswap_msa.c b/kernel/mips/dswap_msa.c index 7b1f02477..67e97f710 100644 --- a/kernel/mips/dswap_msa.c +++ b/kernel/mips/dswap_msa.c @@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - else + else if ((inc_x != 0) && (inc_y != 0)) { for (i = (n >> 3); i--;) { @@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - + else + { + if (inc_x == inc_y) + { + if (n & 1) + { + x0 = *srcx; + *srcx = *srcy; + *srcy = x0; + } + else + return (0); + } + else + { + BLASLONG ix = 0, iy = 0; + while (i < n) + { + x0 = srcx[ix]; + srcx[ix] = srcy[iy]; + srcy[iy] = x0; + ix += inc_x; + iy += inc_y; + i++; + } + } + } return (0); } diff --git a/kernel/mips/sswap_msa.c b/kernel/mips/sswap_msa.c index 46fa8aa87..d412285b0 100644 --- a/kernel/mips/sswap_msa.c +++ b/kernel/mips/sswap_msa.c @@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - else + else if ((inc_x != 0) && (inc_y != 0)) { for (i = (n >> 3); i--;) { @@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } + else + { + if (inc_x == inc_y) + { + if (n & 1) + { + x0 = *srcx; + *srcx = *srcy; + *srcy = x0; + } + else + return (0); + } + else + { + BLASLONG ix = 0, iy = 0; + while (i < n) + { + x0 = srcx[ix]; + srcx[ix] = srcy[iy]; + srcy[iy] = x0; + ix += inc_x; + iy += inc_y; + i++; + } + } + } return (0); }