optimized sgemm kernel for ARMV6
This commit is contained in:
parent
a9bd12da2c
commit
3d5e792c72
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
/**************************************************************************************
|
/**************************************************************************************
|
||||||
* 2013/11/23 Saar
|
* 2013/11/27 Saar
|
||||||
* BLASTEST : OK
|
* BLASTEST : OK
|
||||||
* CTEST : OK
|
* CTEST : OK
|
||||||
* TEST : OK
|
* TEST : OK
|
||||||
|
@ -101,16 +101,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.macro KERNEL4x2_SUB
|
.macro KERNEL4x2_SUB
|
||||||
|
|
||||||
flds s4 , [ BO ]
|
flds s4 , [ BO ]
|
||||||
flds s5 , [ BO, #4 ]
|
|
||||||
|
|
||||||
flds s0 , [ AO ]
|
flds s0 , [ AO ]
|
||||||
flds s1 , [ AO, #4 ]
|
flds s1 , [ AO, #4 ]
|
||||||
flds s2 , [ AO, #8 ]
|
|
||||||
flds s3 , [ AO, #12 ]
|
|
||||||
|
|
||||||
fmacs s8 , s0, s4
|
fmacs s8 , s0, s4
|
||||||
|
flds s2 , [ AO, #8 ]
|
||||||
fmacs s9 , s1, s4
|
fmacs s9 , s1, s4
|
||||||
|
flds s3 , [ AO, #12 ]
|
||||||
fmacs s10 , s2, s4
|
fmacs s10 , s2, s4
|
||||||
|
flds s5 , [ BO, #4 ]
|
||||||
fmacs s11 , s3, s4
|
fmacs s11 , s3, s4
|
||||||
|
|
||||||
fmacs s12 , s0, s5
|
fmacs s12 , s0, s5
|
||||||
|
@ -469,13 +469,20 @@ sgemm_kernel_L2_M4_20:
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
sgemm_kernel_L2_M4_22:
|
sgemm_kernel_L2_M4_22:
|
||||||
|
|
||||||
|
pld [ AO, #A_PRE ]
|
||||||
|
pld [ BO, #B_PRE ]
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
|
pld [ AO, #A_PRE ]
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
|
|
||||||
|
pld [ AO, #A_PRE ]
|
||||||
|
pld [ BO, #B_PRE ]
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
|
pld [ AO, #A_PRE ]
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
KERNEL4x2_SUB
|
KERNEL4x2_SUB
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue