Small Matrix: reduce generic kernel source files

This commit is contained in:
Wangyang Guo 2021-08-13 03:17:38 +00:00
parent c17d6dacb2
commit 989e6bbdd3
18 changed files with 161 additions and 588 deletions

View File

@ -495,30 +495,30 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
endif ()
if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_NN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}GEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_b0_nn.c)
set(${float_char}GEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn.c)
else ()
set(${float_char}GEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_b0_nn.c)
set(${float_char}GEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_NT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}GEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_b0_nt.c)
set(${float_char}GEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt.c)
else ()
set(${float_char}GEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_b0_nt.c)
set(${float_char}GEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_TN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}GEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_b0_tn.c)
set(${float_char}GEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn.c)
else ()
set(${float_char}GEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_b0_tn.c)
set(${float_char}GEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}GEMM_SMALL_K_B0_TT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}GEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_b0_tt.c)
set(${float_char}GEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt.c)
else ()
set(${float_char}GEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_b0_tt.c)
set(${float_char}GEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
endif ()
@ -541,32 +541,32 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NN" "gemm_small_kernel_b0_nn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NR" "gemm_small_kernel_b0_nr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RN" "gemm_small_kernel_b0_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RR" "gemm_small_kernel_b0_rr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NT" "gemm_small_kernel_b0_nt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NC" "gemm_small_kernel_b0_nc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RT" "gemm_small_kernel_b0_rt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RC" "gemm_small_kernel_b0_rc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TN" "gemm_small_kernel_b0_tn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TR" "gemm_small_kernel_b0_tr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CN" "gemm_small_kernel_b0_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CR" "gemm_small_kernel_b0_cr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TT" "gemm_small_kernel_b0_tt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TC" "gemm_small_kernel_b0_tc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CT" "gemm_small_kernel_b0_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CC" "gemm_small_kernel_b0_cc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false ${float_type})
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_NT}" "" "gemm_small_kernel_tt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "" "gemm_small_kernel_b0_nn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "" "gemm_small_kernel_b0_nt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "" "gemm_small_kernel_b0_tn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "" "gemm_small_kernel_b0_tt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false ${float_type})
endif ()
endif ()

View File

@ -4334,32 +4334,32 @@ $(KDIR)dgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
ifndef DGEMM_SMALL_K_B0_NN
DGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_b0_nn.c
DGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_nn.c
endif
ifndef DGEMM_SMALL_K_B0_NT
DGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_b0_nt.c
DGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_nt.c
endif
ifndef DGEMM_SMALL_K_B0_TN
DGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_b0_tn.c
DGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_tn.c
endif
ifndef DGEMM_SMALL_K_B0_TT
DGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_b0_tt.c
DGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_tt.c
endif
$(KDIR)dgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@
$(KDIR)dgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@
$(KDIR)dgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@
$(KDIR)dgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@
ifndef SGEMM_SMALL_M_PERMIT
SGEMM_SMALL_M_PERMIT = ../generic/gemm_small_matrix_permit.c
@ -4397,32 +4397,32 @@ $(KDIR)sgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
ifndef SGEMM_SMALL_K_B0_NN
SGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_b0_nn.c
SGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_nn.c
endif
ifndef SGEMM_SMALL_K_B0_NT
SGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_b0_nt.c
SGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_nt.c
endif
ifndef SGEMM_SMALL_K_B0_TN
SGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_b0_tn.c
SGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_tn.c
endif
ifndef SGEMM_SMALL_K_B0_TT
SGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_b0_tt.c
SGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_tt.c
endif
$(KDIR)sgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@
$(KDIR)sgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@
$(KDIR)sgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@
$(KDIR)sgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@
ifndef CGEMM_SMALL_M_PERMIT
CGEMM_SMALL_M_PERMIT = ../generic/zgemm_small_matrix_permit.c
@ -4496,68 +4496,68 @@ $(KDIR)cgemm_small_kernel_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@
ifndef CGEMM_SMALL_K_B0_NN
CGEMM_SMALL_K_B0_NN = ../generic/zgemm_small_matrix_kernel_b0_nn.c
CGEMM_SMALL_K_B0_NN = ../generic/zgemm_small_matrix_kernel_nn.c
endif
ifndef CGEMM_SMALL_K_B0_NT
CGEMM_SMALL_K_B0_NT = ../generic/zgemm_small_matrix_kernel_b0_nt.c
CGEMM_SMALL_K_B0_NT = ../generic/zgemm_small_matrix_kernel_nt.c
endif
ifndef CGEMM_SMALL_K_B0_TN
CGEMM_SMALL_K_B0_TN = ../generic/zgemm_small_matrix_kernel_b0_tn.c
CGEMM_SMALL_K_B0_TN = ../generic/zgemm_small_matrix_kernel_tn.c
endif
ifndef CGEMM_SMALL_K_B0_TT
CGEMM_SMALL_K_B0_TT = ../generic/zgemm_small_matrix_kernel_b0_tt.c
CGEMM_SMALL_K_B0_TT = ../generic/zgemm_small_matrix_kernel_tt.c
endif
$(KDIR)cgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_nr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNR $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNR -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRN $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRN -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_rr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRR $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRR -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNT $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNT -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_nc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRT $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRT -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_rc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRC $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRC -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTN $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTN -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_tr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTR $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTR -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_cr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCR -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTT $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTT -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_tc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTC $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTC -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCT -DB0 $< -o $@
$(KDIR)cgemm_small_kernel_b0_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC -DB0 $< -o $@
ifndef ZGEMM_SMALL_M_PERMIT
ZGEMM_SMALL_M_PERMIT = ../generic/zgemm_small_matrix_permit.c
@ -4632,65 +4632,65 @@ $(KDIR)zgemm_small_kernel_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@
ifndef ZGEMM_SMALL_K_B0_NN
ZGEMM_SMALL_K_B0_NN = ../generic/zgemm_small_matrix_kernel_b0_nn.c
ZGEMM_SMALL_K_B0_NN = ../generic/zgemm_small_matrix_kernel_nn.c
endif
ifndef ZGEMM_SMALL_K_B0_NT
ZGEMM_SMALL_K_B0_NT = ../generic/zgemm_small_matrix_kernel_b0_nt.c
ZGEMM_SMALL_K_B0_NT = ../generic/zgemm_small_matrix_kernel_nt.c
endif
ifndef ZGEMM_SMALL_K_B0_TN
ZGEMM_SMALL_K_B0_TN = ../generic/zgemm_small_matrix_kernel_b0_tn.c
ZGEMM_SMALL_K_B0_TN = ../generic/zgemm_small_matrix_kernel_tn.c
endif
ifndef ZGEMM_SMALL_K_B0_TT
ZGEMM_SMALL_K_B0_TT = ../generic/zgemm_small_matrix_kernel_b0_tt.c
ZGEMM_SMALL_K_B0_TT = ../generic/zgemm_small_matrix_kernel_tt.c
endif
$(KDIR)zgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_nr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNR $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNR -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRN $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRN -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_rr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRR $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRR -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNT $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNT -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_nc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRT $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRT -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_rc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRC $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRC -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTN $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTN -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_tr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTR $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTR -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_cr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCR -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTT $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTT -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_tc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTC $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTC -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCT -DB0 $< -o $@
$(KDIR)zgemm_small_kernel_b0_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC -DB0 $< -o $@

View File

@ -1,49 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb,FLOAT * C, BLASLONG ldc)
{
//naive implemtation
//Column major
BLASLONG i,j,k;
FLOAT result=0.0;
for(i=0; i<M; i++){
for(j=0; j<N; j++){
result=0.0;
for(k=0; k<K; k++){
result += A[i+k*lda] * B[k+j*ldb];
}
C[i+j*ldc]=alpha * result;
}
}
return 0;
}

View File

@ -1,49 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
{
//naive implemtation
//Column major
BLASLONG i,j,k;
FLOAT result=0.0;
for(i=0; i<M; i++){
for(j=0; j<N; j++){
result=0.0;
for(k=0; k<K; k++){
result += A[i+k*lda] * B[k*ldb+j];
}
C[i+j*ldc]=alpha * result;
}
}
return 0;
}

View File

@ -1,49 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb,FLOAT * C, BLASLONG ldc)
{
//naive implemtation
//Column major
BLASLONG i,j,k;
FLOAT result=0.0;
for(i=0; i<M; i++){
for(j=0; j<N; j++){
result=0.0;
for(k=0; k<K; k++){
result += A[i*lda+k] * B[k+j*ldb];
}
C[i+j*ldc]=alpha * result;
}
}
return 0;
}

View File

@ -1,49 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
{
//naive implemtation
//Column major
BLASLONG i,j,k;
FLOAT result=0.0;
for(i=0; i<M; i++){
for(j=0; j<N; j++){
result=0.0;
for(k=0; k<K; k++){
result += A[i*lda+k] * B[k*ldb+j];
}
C[i+j*ldc]=alpha * result;
}
}
return 0;
}

View File

@ -27,11 +27,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifdef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb,FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT beta, FLOAT * C, BLASLONG ldc)
#endif
{
//naive implemtation
//Column major
BLASLONG i,j,k;
FLOAT result=0.0;
@ -41,9 +45,12 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for(k=0; k<K; k++){
result += A[i+k*lda] * B[k+j*ldb];
}
#ifdef B0
C[i+j*ldc]=alpha * result;
#else
C[i+j*ldc]=C[i+j*ldc] * beta + alpha * result;
#endif
}
}
return 0;
}

View File

@ -27,7 +27,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifdef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT beta, FLOAT * C, BLASLONG ldc)
#endif
{
//naive implemtation
//Column major
@ -41,9 +45,12 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for(k=0; k<K; k++){
result += A[i+k*lda] * B[k*ldb+j];
}
#ifdef B0
C[i+j*ldc]=alpha * result;
#else
C[i+j*ldc]=C[i+j*ldc] * beta + alpha * result;
#endif
}
}
return 0;
}

View File

@ -27,7 +27,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifdef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb,FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT beta, FLOAT * C, BLASLONG ldc)
#endif
{
//naive implemtation
//Column major
@ -41,7 +45,11 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for(k=0; k<K; k++){
result += A[i*lda+k] * B[k+j*ldb];
}
#ifdef B0
C[i+j*ldc]=alpha * result;
#else
C[i+j*ldc]=C[i+j*ldc] * beta + alpha * result;
#endif
}
}

View File

@ -27,7 +27,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifdef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha, FLOAT * B, BLASLONG ldb, FLOAT beta, FLOAT * C, BLASLONG ldc)
#endif
{
//naive implemtation
//Column major
@ -41,7 +45,11 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for(k=0; k<K; k++){
result += A[i*lda+k] * B[k*ldb+j];
}
#ifdef B0
C[i+j*ldc]=alpha * result;
#else
C[i+j*ldc]=C[i+j*ldc] * beta + alpha * result;
#endif
}
}

View File

@ -1,74 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
{
FLOAT real, imag;
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
real=0;
imag=0;
for(l = 0; l < K; l++){
#if defined(NN)
real += (A[l*2*lda + 2*i]*B[j*2*ldb + 2*l]
-A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(A[l*2*lda + 2*i] * B[j*2*ldb + 2*l + 1]
+ A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l]);
#elif defined(NR)
real += (A[l*2*lda + 2*i]*B[j*2*ldb + 2*l]
+A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(-A[l*2*lda + 2*i] * B[j*2*ldb + 2*l + 1]
+ A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l]);
#elif defined(RN)
real += (A[l*2*lda + 2*i]*B[j*2*ldb + 2*l]
+A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(A[l*2*lda + 2*i] * B[j*2*ldb + 2*l + 1]
- A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l]);
#elif defined(RR)
real += (A[l*2*lda + 2*i]*B[j*2*ldb + 2*l]
-A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(-A[l*2*lda + 2*i] * B[j*2*ldb + 2*l + 1]
- A[l*2*lda + 2*i + 1] * B[j*2*ldb + 2*l]);
#endif
}
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
}
}
return 0;
}

View File

@ -1,77 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
{
FLOAT real, imag;
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
real=0;
imag=0;
for(l = 0; l < K; l++){
#if defined(NT)
real += (A[l*2*lda + 2*i]*B[l*2*ldb + 2*j]
-A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(A[l*2*lda + 2*i] * B[l*2*ldb + 2*j + 1]
+ A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j]);
#elif defined(NC)
real += (A[l*2*lda + 2*i]*B[l*2*ldb + 2*j]
+A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(-A[l*2*lda + 2*i] * B[l*2*ldb + 2*j + 1]
+ A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j]);
#elif defined(RT)
real += (A[l*2*lda + 2*i]*B[l*2*ldb + 2*j]
+A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(A[l*2*lda + 2*i] * B[l*2*ldb + 2*j + 1]
- A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j]);
#elif defined(RC)
real += (A[l*2*lda + 2*i]*B[l*2*ldb + 2*j]
-A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(-A[l*2*lda + 2*i] * B[l*2*ldb + 2*j + 1]
- A[l*2*lda + 2*i + 1] * B[l*2*ldb + 2*j]);
#endif
}
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
}
}
return 0;
}

View File

@ -1,77 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
{
FLOAT real, imag;
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
real=0;
imag=0;
for(l = 0; l < K; l++){
#if defined(TN)
real += (A[i*2*lda + 2*l]*B[j*2*ldb + 2*l]
-A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(A[i*2*lda + 2*l] * B[j*2*ldb + 2*l + 1]
+ A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l]);
#elif defined(TR)
real += (A[i*2*lda + 2*l]*B[j*2*ldb + 2*l]
+A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(-A[i*2*lda + 2*l] * B[j*2*ldb + 2*l + 1]
+ A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l]);
#elif defined(CN)
real += (A[i*2*lda + 2*l]*B[j*2*ldb + 2*l]
+A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(A[i*2*lda + 2*l] * B[j*2*ldb + 2*l + 1]
- A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l]);
#elif defined(CR)
real += (A[i*2*lda + 2*l]*B[j*2*ldb + 2*l]
-A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l + 1]);
imag+=(-A[i*2*lda + 2*l] * B[j*2*ldb + 2*l + 1]
- A[i*2*lda + 2*l + 1] * B[j*2*ldb + 2*l]);
#endif
}
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
}
}
return 0;
}

View File

@ -1,77 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
{
FLOAT real, imag;
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
real=0;
imag=0;
for(l = 0; l < K; l++){
#if defined(TT)
real += (A[i*2*lda + 2*l]*B[l*2*ldb + 2*j]
-A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(A[i*2*lda + 2*l] * B[l*2*ldb + 2*j + 1]
+ A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j]);
#elif defined(TC)
real += (A[i*2*lda + 2*l]*B[l*2*ldb + 2*j]
+A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(-A[i*2*lda + 2*l] * B[l*2*ldb + 2*j + 1]
+ A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j]);
#elif defined(CT)
real += (A[i*2*lda + 2*l]*B[l*2*ldb + 2*j]
+A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(A[i*2*lda + 2*l] * B[l*2*ldb + 2*j + 1]
- A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j]);
#elif defined(CC)
real += (A[i*2*lda + 2*l]*B[l*2*ldb + 2*j]
-A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j + 1]);
imag+=(-A[i*2*lda + 2*l] * B[l*2*ldb + 2*j + 1]
- A[i*2*lda + 2*l + 1] * B[l*2*ldb + 2*j]);
#endif
}
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
}
}
return 0;
}

View File

@ -27,10 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifndef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT beta0, FLOAT beta1, FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
#endif
{
FLOAT real, imag;
#ifndef B0
FLOAT tmp0, tmp1;
#endif
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
@ -65,12 +71,17 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
#endif
}
#ifndef B0
tmp0 = beta0*C[j*2*ldc + 2*i] - beta1*C[j*2*ldc+ 2*i + 1];
tmp1 = beta0*C[j*2*ldc+ 2*i + 1] + beta1*C[j*2*ldc + 2*i];
C[j*2*ldc + 2*i] =tmp0+ alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = tmp1+ alpha0*imag + real*alpha1;
#else
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
#endif
}
}

View File

@ -27,10 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifndef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT beta0, FLOAT beta1, FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
#endif
{
FLOAT real, imag;
#ifndef B0
FLOAT tmp0, tmp1;
#endif
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
@ -69,12 +75,17 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
#endif
}
#ifndef B0
tmp0 = beta0*C[j*2*ldc + 2*i] - beta1*C[j*2*ldc+ 2*i + 1];
tmp1 = beta0*C[j*2*ldc+ 2*i + 1] + beta1*C[j*2*ldc + 2*i];
C[j*2*ldc + 2*i] =tmp0+ alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = tmp1+ alpha0*imag + real*alpha1;
#else
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
#endif
}
}

View File

@ -27,10 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifndef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT beta0, FLOAT beta1, FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
#endif
{
FLOAT real, imag;
#ifndef B0
FLOAT tmp0, tmp1;
#endif
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
@ -69,12 +75,17 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
#endif
}
#ifndef B0
tmp0 = beta0*C[j*2*ldc + 2*i] - beta1*C[j*2*ldc+ 2*i + 1];
tmp1 = beta0*C[j*2*ldc+ 2*i + 1] + beta1*C[j*2*ldc + 2*i];
C[j*2*ldc + 2*i] =tmp0+ alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = tmp1+ alpha0*imag + real*alpha1;
#else
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
#endif
}
}

View File

@ -27,10 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#ifndef B0
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT beta0, FLOAT beta1, FLOAT * C, BLASLONG ldc)
#else
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alpha0, FLOAT alpha1, FLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc)
#endif
{
FLOAT real, imag;
#ifndef B0
FLOAT tmp0, tmp1;
#endif
int i, j, l;
for(i = 0; i < M; i++){
for(j = 0; j < N; j++){
@ -69,12 +75,17 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
#endif
}
#ifndef B0
tmp0 = beta0*C[j*2*ldc + 2*i] - beta1*C[j*2*ldc+ 2*i + 1];
tmp1 = beta0*C[j*2*ldc+ 2*i + 1] + beta1*C[j*2*ldc + 2*i];
C[j*2*ldc + 2*i] =tmp0+ alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = tmp1+ alpha0*imag + real*alpha1;
#else
C[j*2*ldc + 2*i] = alpha0*real - alpha1*imag;
C[j*2*ldc+ 2*i + 1] = alpha0*imag + real*alpha1;
#endif
}
}