optimizations for trmm
This commit is contained in:
parent
125610d23b
commit
1b10ff129a
|
@ -135,6 +135,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
START_RPCC();
|
||||
|
@ -199,6 +201,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
START_RPCC();
|
||||
|
@ -288,6 +292,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
START_RPCC();
|
||||
|
@ -352,6 +358,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
START_RPCC();
|
||||
|
|
|
@ -119,6 +119,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < ls - js; jjs += min_jj){
|
||||
min_jj = ls - js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#ifndef TRANSA
|
||||
|
@ -137,6 +139,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < min_l; jjs += min_jj){
|
||||
min_jj = min_l - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#ifndef TRANSA
|
||||
|
@ -188,6 +192,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#ifndef TRANSA
|
||||
|
@ -239,6 +245,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < min_l; jjs += min_jj){
|
||||
min_jj = min_l - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#ifndef TRANSA
|
||||
|
@ -258,6 +266,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){
|
||||
min_jj = js - ls - min_l - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#ifndef TRANSA
|
||||
|
@ -313,6 +323,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#ifndef TRANSA
|
||||
|
|
Loading…
Reference in New Issue