diff --git a/common_power.h b/common_power.h index 889205c75..76b9f0f32 100644 --- a/common_power.h +++ b/common_power.h @@ -39,6 +39,35 @@ #ifndef COMMON_POWER #define COMMON_POWER +#define str(x) #x + +#ifdef OS_AIX +#define XXSPLTD(T,A,z) xxpermdi T, A, A, 0b##z##z +#define XXMRGHD(T,A,B) xxpermdi T, A, B, 0b00 +#define XXMRGLD(T,A,B) xxpermdi T, A, B, 0b11 +#define XXSWAPD(T,A) xxpermdi T, A, A, 0b10 +#define XVMOVDP(T,A) xvcpsgndp T, A, A + +#define XXSPLTD_S(T,A,z) "xxpermdi " str(T) ", " str(A) ", " str(A) ", 0b" str(z ## z) " \n\t" +#define XXMRGHD_S(T,A,B) "xxpermdi " str(T) ", " str(A) ", " str(B) ", 0b00 \n\t" +#define XXMRGLD_S(T,A,B) "xxpermdi " str(T) ", " str(A) ", " str(B) ", 0b11 \n\t" +#define XXSWAPD_S(T,A) "xxpermdi " str(T) ", " str(A) ", " str(A) ", 0b10 \n\t" + +#else +#define XXSPLTD(T,A,z) xxspltd T, A, z +#define XXMRGHD(T,A,B) xxmrghd T, A, B +#define XXMRGLD(T,A,B) xxmrgld T, A, B +#define XXSWAPD(T,A) xxswapd T, A +#define XVMOVDP(T,A) xvmovdp T, A + +#define XXSPLTD_S(T,A,z) "xxspltd T, A, z \n\t" +#define XXMRGHD_S(T,A,B) "xxmrghd T, A, B \n\t" +#define XXMRGLD_S(T,A,B) "xxmrgld T, A, B \n\t" +#define XXSWAPD_S(T,A) "xxswapd T, A" + +#endif + + #if defined(POWER8) || defined(POWER9) #define MB __asm__ __volatile__ ("eieio":::"memory") #define WMB __asm__ __volatile__ ("eieio":::"memory") diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index f83def47b..ed8ae406f 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -57,8 +57,6 @@ USE_TRMM = 1 endif - - SKERNELOBJS += \ sgemm_kernel$(TSUFFIX).$(SUFFIX) \ $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ @@ -436,7 +434,10 @@ $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s + m4 sgemmotcopy.s > sgemmotcopy_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ + rm sgemmotcopy.s sgemmotcopy_nomacros.s ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) @@ -444,12 +445,17 @@ $(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - + $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s + m4 sgemmitcopy.s > sgemmitcopy_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ + rm sgemmitcopy.s sgemmitcopy_nomacros.s endif $(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s + m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ + rm dgemm_ncopy.s dgemm_ncopy_nomacros.s $(KDIR)$(DGEMMOTCOPYOBJ) : $(KERNELDIR)/$(DGEMMOTCOPY) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ @@ -460,7 +466,10 @@ $(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s + m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ + rm dgemm_itcopy.s dgemm_itcopy_nomacros.s endif @@ -485,10 +494,16 @@ endif endif $(KDIR)$(CGEMMONCOPYOBJ) : $(KERNELDIR)/$(CGEMMONCOPY) +# $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o cgemm_oncopy.s +# m4 cgemm_oncopy.s > cgemm_oncopy_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ +# rm cgemm_oncopy.s cgemm_oncopy_nomacros.s $(KDIR)$(CGEMMOTCOPYOBJ) : $(KERNELDIR)/$(CGEMMOTCOPY) +# $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o cgemm_otcopy.s +# m4 cgemm_otcopy.s > cgemm_otcopy_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ +# rm cgemm_otcopy.s cgemm_otcopy_nomacros.s ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) @@ -496,7 +511,10 @@ $(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s + m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ + rm cgemm_itcopy.s cgemm_itcopy_nomacros.s endif @@ -512,7 +530,10 @@ $(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s + m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ + rm zgemm_itcopy.s zgemm_itcopy_nomacros.s endif @@ -537,37 +558,67 @@ endif endif $(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s + m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s + m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ + rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEPEND) $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ $(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ + $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s + m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ + rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s $(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $@ + $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s + m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ + rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s $(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ + $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s + m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ + rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s $(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ + $(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s + m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ + rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s $(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s + m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ + rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s + m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ + rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s + m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ + rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ + $(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s + m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ + rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s $(KDIR)xgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DNN $< -o $@ @@ -584,28 +635,56 @@ $(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD ifdef USE_TRMM $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s + m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ + rm strmmkernel_ln.s strmmkernel_ln_nomacros.s $(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s + m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ + rm strmmkernel_lt.s strmmkernel_lt_nomacros.s $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s + m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ + rm strmmkernel_rn.s strmmkernel_rn_nomacros.s $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s + m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s +# $(CC) $(CFLAGS) -E $< -o dtrmm_kernel_ln.s + m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ + rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s +# $(CC) $(CFLAGS) -E $< -o dtrmm_kernel_lt.s + m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ + rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s +# $(CC) $(CFLAGS) -E $< -o dtrmm_kernel_rn.s + m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ + rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s +# $(CC) $(CFLAGS) -E $< -o dtrmm_kernel_rt.s + m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ + rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s $(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -620,52 +699,100 @@ $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s + m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ + rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s $(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s + m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ + rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s $(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s + m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ + rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s $(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s + m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ + rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s $(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s + m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ + rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s $(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s + m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ + rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s + m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ + rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s + m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ + rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s + m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ + rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s + m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ + rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s + m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ + rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s + m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ + rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s + m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ + rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s + m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ + rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s + m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ + rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s + m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ + rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s else $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -677,7 +804,10 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s + m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s + $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -801,10 +931,16 @@ $(KDIR)strsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_RT) $(ST $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -UUPPER -DRT -UCONJ $< -o $@ $(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DTRSMDEPEND) +# $(CC) $(CFLAGS) -E $< -o dtrsm_kernel_ln.s +# m4 dtrsm_kernel_ln.s > dtrsm_kernel_ln_nomacros.s $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DLN -UCONJ $< -o $@ +# rm dtrsm_kernel_ln.s dtrsm_kernel_ln_nomacros.s $(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ + $(CC) $(CFLAGS) -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s + m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s + $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ + rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s $(KDIR)dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_RN) $(DTRSMDEPEND) $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DRN -UCONJ $< -o $@ @@ -1940,7 +2076,7 @@ $(SGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMITCOPY) endif -$(DGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMONCOPY) +$(D cgemm_kernel_r_nomacros.s + $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ + rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s $(KDIR)cgemm_kernel_b$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ @@ -2083,7 +2222,10 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ $(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ + $(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s + m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s + $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ + rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ diff --git a/kernel/power/casum_microk_power8.c b/kernel/power/casum_microk_power8.c index 7d12c9885..91d53ffc3 100644 --- a/kernel/power/casum_microk_power8.c +++ b/kernel/power/casum_microk_power8.c @@ -68,10 +68,10 @@ static float casum_kernel_16 (long n, float *x) "addi %2, %2, 128 \n\t" "addic. %1, %1, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvabssp 48, 40 \n\t" "xvabssp 49, 41 \n\t" @@ -108,9 +108,9 @@ static float casum_kernel_16 (long n, float *x) "xvaddsp 38, 38, %x5 \n\t" "xvaddsp 39, 39, %x6 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvabssp 48, 40 \n\t" "xvabssp 49, 41 \n\t" diff --git a/kernel/power/ccopy_microk_power8.c b/kernel/power/ccopy_microk_power8.c index 613c4d286..6a7886e6f 100644 --- a/kernel/power/ccopy_microk_power8.c +++ b/kernel/power/ccopy_microk_power8.c @@ -62,10 +62,10 @@ static void ccopy_kernel_32 (long n, float *x, float *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "stxvd2x 32, 0, %3 \n\t" "stxvd2x 33, %5, %3 \n\t" @@ -108,9 +108,9 @@ static void ccopy_kernel_32 (long n, float *x, float *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "stxvd2x 32, 0, %3 \n\t" "stxvd2x 33, %5, %3 \n\t" diff --git a/kernel/power/cgemm_macros_8x4_power8.S b/kernel/power/cgemm_macros_8x4_power8.S index 9a18cb189..46108bbb4 100644 --- a/kernel/power/cgemm_macros_8x4_power8.S +++ b/kernel/power/cgemm_macros_8x4_power8.S @@ -83,7 +83,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x8_1', ` +#else .macro LOAD4x8_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 lxvw4x vs1, o16, AO // load a2, a3 @@ -107,9 +111,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_I1', ` +#else .macro KERNEL4x8_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -172,9 +184,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_1', ` +#else .macro KERNEL4x8_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -237,9 +257,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_2', ` +#else .macro KERNEL4x8_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -302,9 +330,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 // a7_r*b3_i, a7_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_E2', ` +#else .macro KERNEL4x8_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -344,9 +380,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 // a7_r*b3_i, a7_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUBI1', ` +#else .macro KERNEL4x8_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -409,9 +453,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUB1', ` +#else .macro KERNEL4x8_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -474,9 +526,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x8', ` +#else .macro SAVE4x8 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -1546,14 +1606,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x4_1', ` +#else .macro LOAD4x4_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 lxvw4x vs1, o16, AO // load a2, a3 @@ -1575,9 +1643,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_I1', ` +#else .macro KERNEL4x4_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -1622,9 +1698,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_1', ` +#else .macro KERNEL4x4_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -1669,9 +1753,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_2', ` +#else .macro KERNEL4x4_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1716,9 +1808,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 // a5_r*b3_i, a5_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_E2', ` +#else .macro KERNEL4x4_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -1742,9 +1842,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 // a5_r*b3_i, a5_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUBI1', ` +#else .macro KERNEL4x4_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1789,9 +1897,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUB1', ` +#else .macro KERNEL4x4_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1836,9 +1952,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x4', ` +#else .macro SAVE4x4 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -2388,14 +2512,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x2_1', ` +#else .macro LOAD4x2_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2416,9 +2548,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_I1', ` +#else .macro KERNEL4x2_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -2454,9 +2594,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_1', ` +#else .macro KERNEL4x2_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -2492,9 +2640,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_2', ` +#else .macro KERNEL4x2_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2530,9 +2686,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 // a4_r*b3_i, a4_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_E2', ` +#else .macro KERNEL4x2_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -2548,9 +2712,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 // a4_r*b3_i, a4_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUBI1', ` +#else .macro KERNEL4x2_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2586,9 +2758,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUB1', ` +#else .macro KERNEL4x2_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2624,9 +2804,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x2', ` +#else .macro SAVE4x2 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -2916,14 +3104,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x1_1', ` +#else .macro LOAD4x1_1 +#endif lxsspx vs0, o0, AO // load a0_r lxsspx vs1, o4, AO // load a0_i @@ -2945,9 +3141,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_I1', ` +#else .macro KERNEL4x1_I1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -2992,9 +3196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_1', ` +#else .macro KERNEL4x1_1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -3039,9 +3251,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_2', ` +#else .macro KERNEL4x1_2 +#endif lxsspx vs0, o0, AO // load a0_r @@ -3086,9 +3306,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs22 // a4_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_E2', ` +#else .macro KERNEL4x1_E2 +#endif xsmaddadp vs32, vs4, vs16 // a4_r*b0_r @@ -3112,9 +3340,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs22 // a4_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUBI1', ` +#else .macro KERNEL4x1_SUBI1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -3159,9 +3395,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUB1', ` +#else .macro KERNEL4x1_SUB1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -3206,9 +3450,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x1', ` +#else .macro SAVE4x1 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -3382,14 +3634,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 lxvw4x vs1, o16, AO // load a2, a3 @@ -3406,9 +3666,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -3446,9 +3714,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -3486,9 +3762,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3526,9 +3810,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 // a7_r*b1_i, a7_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -3550,9 +3842,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 // a7_r*b1_i, a7_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3590,9 +3890,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3630,9 +3938,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -4170,14 +4486,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 lxvw4x vs1, o16, AO // load a2, a3 @@ -4192,9 +4516,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4222,9 +4554,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4252,9 +4592,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4282,9 +4630,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 // a5_r*b1_i, a5_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -4298,9 +4654,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 // a5_r*b1_i, a5_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4328,9 +4692,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4358,9 +4730,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -4638,14 +5018,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4659,9 +5047,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4684,9 +5080,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4709,9 +5113,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4734,9 +5146,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 // a4_r*b1_i, a4_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -4746,9 +5166,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 // a4_r*b1_i, a4_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4771,9 +5199,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4796,9 +5232,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -4946,14 +5390,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxsspx vs0, o0, AO // load a0_r lxsspx vs1, o4, AO // load a0_i @@ -4968,9 +5420,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -4998,9 +5458,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -5028,9 +5496,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxsspx vs0, o0, AO // load a0_r @@ -5058,9 +5534,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs18 // a4_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xsmaddadp vs32, vs4, vs16 // a4_r*b0_r @@ -5074,9 +5558,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs18 // a4_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -5104,9 +5596,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -5134,9 +5634,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -5226,14 +5734,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 lxvw4x vs1, o16, AO // load a2, a3 @@ -5247,9 +5763,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5275,9 +5799,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5303,9 +5835,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5331,9 +5871,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 // a7_r*b0_i, a7_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -5346,9 +5894,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 // a7_r*b0_i, a7_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5374,9 +5930,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5402,9 +5966,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -5676,14 +6248,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 lxvw4x vs1, o16, AO // load a2, a3 @@ -5695,9 +6275,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5717,9 +6305,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5739,9 +6335,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5761,9 +6365,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 // a5_r*b0_i, a5_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -5772,9 +6384,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 // a5_r*b0_i, a5_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5794,9 +6414,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5816,9 +6444,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -5960,14 +6596,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5978,9 +6622,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5997,9 +6649,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -6016,9 +6676,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6035,18 +6703,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs4, vs17 // a4_r*b0_i, a4_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r xvmaddasp vs33, vs4, vs17 // a4_r*b0_i, a4_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6063,9 +6747,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6082,9 +6774,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -6161,14 +6861,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxsspx vs0, o0, AO // load a0_r lxsspx vs1, o4, AO // load a0_i @@ -6180,9 +6888,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -6202,9 +6918,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -6224,9 +6948,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxsspx vs0, o0, AO // load a0_r @@ -6246,9 +6978,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs16 // a4_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xsmaddadp vs32, vs4, vs16 // a4_r*b0_r @@ -6257,9 +6997,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs16 // a4_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -6279,9 +7027,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -6301,9 +7057,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO xxlxor vs24, vs24, vs24 @@ -6351,5 +7115,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/cgemm_tcopy_macros_8_power8.S b/kernel/power/cgemm_tcopy_macros_8_power8.S index 03fda2766..64bf8dd99 100644 --- a/kernel/power/cgemm_tcopy_macros_8_power8.S +++ b/kernel/power/cgemm_tcopy_macros_8_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x8', ` +#else .macro COPY_4x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -93,13 +97,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs46, o32, T1 stxvw4x vs47, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x4', ` +#else .macro COPY_4x4 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -133,13 +145,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs38, o32, T1 stxvw4x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x2', ` +#else .macro COPY_4x2 +#endif lxvw4x vs32, o0, A0 addi A0, A0, 16 @@ -163,13 +183,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x1', ` +#else .macro COPY_4x1 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -207,13 +235,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs38, o0, T1 stxsspx vs39, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x8', ` +#else .macro COPY_2x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -241,13 +277,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs38, o32, T1 stxvw4x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x4', ` +#else .macro COPY_2x4 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -265,13 +309,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs34, o32, T1 stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x2', ` +#else .macro COPY_2x2 +#endif lxvw4x vs32, o0, A0 addi A0, A0, 16 @@ -285,13 +337,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x1', ` +#else .macro COPY_2x1 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -311,13 +371,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs34, o0, T1 stxsspx vs35, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x8', ` +#else .macro COPY_1x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -332,13 +400,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs34, o32, T1 stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x4', ` +#else .macro COPY_1x4 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -349,13 +425,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs32, o0, T1 stxvw4x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x2', ` +#else .macro COPY_1x2 +#endif lxvw4x vs32, o0, A0 addi A0, A0, 16 @@ -364,13 +448,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x1', ` +#else .macro COPY_1x1 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -381,5 +473,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs32, o0, T1 stxsspx vs33, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/crot.c b/kernel/power/crot.c index 959a9eda0..2a5835546 100644 --- a/kernel/power/crot.c +++ b/kernel/power/crot.c @@ -56,9 +56,9 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s) "addi %[x_ptr], %[x_ptr], 64 \n\t" "addi %[y_ptr], %[y_ptr], 64 \n\t" "addic. %[temp_n], %[temp_n], -8 \n\t" - "ble 2f \n\t" - ".p2align 5 \n\t" - "1: \n\t" + "ble two%= \n\t" + ".align 5 \n\t" + "one%=: \n\t" "xvmulsp 40, 32, 36 \n\t" // c * x "xvmulsp 41, 33, 36 \n\t" "xvmulsp 42, 34, 36 \n\t" @@ -104,8 +104,8 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s) "addi %[x_ptr], %[x_ptr], 128 \n\t" "addi %[y_ptr], %[y_ptr], 128 \n\t" "addic. %[temp_n], %[temp_n], -8 \n\t" - "bgt 1b \n\t" - "2: \n\t" + "bgt one%= \n\t" + "two%=: \n\t" "xvmulsp 40, 32, 36 \n\t" // c * x "xvmulsp 41, 33, 36 \n\t" "xvmulsp 42, 34, 36 \n\t" diff --git a/kernel/power/cswap_microk_power8.c b/kernel/power/cswap_microk_power8.c index 8d7d0c0b9..829800230 100644 --- a/kernel/power/cswap_microk_power8.c +++ b/kernel/power/cswap_microk_power8.c @@ -39,8 +39,8 @@ static void cswap_kernel_32 (long n, float *x, float *y) { __asm__ ( - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "lxvd2x 32, 0, %4 \n\t" "lxvd2x 33, %5, %4 \n\t" @@ -131,7 +131,7 @@ static void cswap_kernel_32 (long n, float *x, float *y) "addi %4, %4, 128 \n\t" "addic. %2, %2, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" "#n=%2 x=%0=%3 y=%1=%4 o16=%5 o32=%6 o48=%7 o64=%8 o80=%9 o96=%10 o112=%11" : diff --git a/kernel/power/ctrmm_macros_8x4_power8.S b/kernel/power/ctrmm_macros_8x4_power8.S index 48a21252c..922cab57a 100644 --- a/kernel/power/ctrmm_macros_8x4_power8.S +++ b/kernel/power/ctrmm_macros_8x4_power8.S @@ -83,7 +83,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x8_1', ` +#else .macro LOAD4x8_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -113,9 +117,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_I1', ` +#else .macro KERNEL4x8_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -184,9 +196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_1', ` +#else .macro KERNEL4x8_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -255,9 +275,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_2', ` +#else .macro KERNEL4x8_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -326,9 +354,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 // a7_r*b3_i, a7_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_E2', ` +#else .macro KERNEL4x8_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -368,9 +404,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 // a7_r*b3_i, a7_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUBI1', ` +#else .macro KERNEL4x8_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -439,9 +483,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUB1', ` +#else .macro KERNEL4x8_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -510,9 +562,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 // a3_r*b3_i, a3_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x8', ` +#else .macro SAVE4x8 +#endif mr T1, CO @@ -1597,14 +1657,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x4_1', ` +#else .macro LOAD4x4_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1630,9 +1698,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_I1', ` +#else .macro KERNEL4x4_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -1681,9 +1757,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_1', ` +#else .macro KERNEL4x4_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -1732,9 +1816,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_2', ` +#else .macro KERNEL4x4_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1783,9 +1875,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 // a5_r*b3_i, a5_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_E2', ` +#else .macro KERNEL4x4_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -1809,9 +1909,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 // a5_r*b3_i, a5_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUBI1', ` +#else .macro KERNEL4x4_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1860,9 +1968,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUB1', ` +#else .macro KERNEL4x4_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -1911,9 +2027,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 // a1_r*b3_i, a1_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x4', ` +#else .macro SAVE4x4 +#endif mr T1, CO @@ -2470,14 +2594,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x2_1', ` +#else .macro LOAD4x2_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2501,9 +2633,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_I1', ` +#else .macro KERNEL4x2_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -2542,9 +2682,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_1', ` +#else .macro KERNEL4x2_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -2583,9 +2731,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_2', ` +#else .macro KERNEL4x2_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2624,9 +2780,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 // a4_r*b3_i, a4_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_E2', ` +#else .macro KERNEL4x2_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -2642,9 +2806,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 // a4_r*b3_i, a4_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUBI1', ` +#else .macro KERNEL4x2_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2683,9 +2855,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUB1', ` +#else .macro KERNEL4x2_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -2724,9 +2904,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 // a0_r*b3_i, a0_i*b3_i, a1_r*b3_i, a1_i*b3_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x2', ` +#else .macro SAVE4x2 +#endif mr T1, CO @@ -3019,14 +3207,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x1_1', ` +#else .macro LOAD4x1_1 +#endif lxsspx vs0, o0, AO // load a0_r lxsspx vs1, o4, AO // load a0_i @@ -3055,9 +3251,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_I1', ` +#else .macro KERNEL4x1_I1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -3109,9 +3313,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_1', ` +#else .macro KERNEL4x1_1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -3163,9 +3375,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_2', ` +#else .macro KERNEL4x1_2 +#endif lxsspx vs0, o0, AO // load a0_r @@ -3217,9 +3437,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs22 // a4_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_E2', ` +#else .macro KERNEL4x1_E2 +#endif xsmaddadp vs32, vs4, vs16 // a4_r*b0_r @@ -3243,9 +3471,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs22 // a4_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUBI1', ` +#else .macro KERNEL4x1_SUBI1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -3297,9 +3533,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUB1', ` +#else .macro KERNEL4x1_SUB1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -3351,9 +3595,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs14 // a0_i*b3_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x1', ` +#else .macro SAVE4x1 +#endif mr T1, CO @@ -3526,14 +3778,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3556,9 +3816,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -3602,9 +3870,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -3648,9 +3924,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3694,9 +3978,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 // a7_r*b1_i, a7_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -3718,9 +4010,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 // a7_r*b1_i, a7_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3764,9 +4064,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -3810,9 +4118,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 // a3_r*b1_i, a3_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -4357,14 +4673,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4383,9 +4707,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4417,9 +4749,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4451,9 +4791,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4485,9 +4833,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 // a5_r*b1_i, a5_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -4501,9 +4857,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 // a5_r*b1_i, a5_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4535,9 +4899,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4569,9 +4941,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 // a1_r*b1_i, a1_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -4852,14 +5232,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4876,9 +5264,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4904,9 +5300,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -4932,9 +5336,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -4960,9 +5372,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 // a4_r*b1_i, a4_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -4972,9 +5392,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 // a4_r*b1_i, a4_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5000,9 +5428,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5028,9 +5464,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 // a0_r*b1_i, a0_i*b1_i, a1_r*b1_i, a1_i*b1_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -5179,14 +5623,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxsspx vs0, o0, AO // load a0_r lxsspx vs1, o4, AO // load a0_i @@ -5205,9 +5657,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -5239,9 +5699,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -5273,9 +5741,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxsspx vs0, o0, AO // load a0_r @@ -5307,9 +5783,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs18 // a4_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xsmaddadp vs32, vs4, vs16 // a4_r*b0_r @@ -5323,9 +5807,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs18 // a4_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -5357,9 +5849,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -5391,9 +5891,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs10 // a0_i*b1_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -5482,14 +5990,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5514,9 +6030,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5553,9 +6077,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -5592,9 +6124,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5631,9 +6171,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 // a7_r*b0_i, a7_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -5646,9 +6194,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 // a7_r*b0_i, a7_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5685,9 +6241,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -5724,9 +6288,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 // a3_r*b0_i, a3_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -6001,14 +6573,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6029,9 +6609,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -6060,9 +6648,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -6091,9 +6687,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6122,9 +6726,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 // a5_r*b0_i, a5_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r @@ -6133,9 +6745,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 // a5_r*b0_i, a5_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6164,9 +6784,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6195,9 +6823,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 // a1_r*b0_i, a1_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -6340,14 +6976,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6366,9 +7010,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -6393,9 +7045,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxvw4x vs4, o0, AO // load a0, a1 @@ -6420,9 +7080,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6447,18 +7115,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs4, vs17 // a4_r*b0_i, a4_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xvmaddasp vs32, vs4, vs16 // a4_r*b0_r, a4_i*b0_r, a1_r*b0_r, a1_i*b0_r xvmaddasp vs33, vs4, vs17 // a4_r*b0_i, a4_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6483,9 +7167,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxvw4x vs0, o0, AO // load a0, a1 @@ -6510,9 +7202,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 // a0_r*b0_i, a0_i*b0_i, a1_r*b0_i, a1_i*b0_i +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -6589,14 +7289,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxsspx vs0, o0, AO // load a0_r lxsspx vs1, o4, AO // load a0_i @@ -6610,9 +7318,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -6634,9 +7350,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxsspx vs4, o0, AO // load a0_r @@ -6658,9 +7382,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxsspx vs0, o0, AO // load a0_r @@ -6682,9 +7414,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs16 // a4_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xsmaddadp vs32, vs4, vs16 // a4_r*b0_r @@ -6693,9 +7433,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs16 // a4_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -6717,9 +7465,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxsspx vs0, o0, AO // load a0_r @@ -6741,9 +7497,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs8 // a0_i*b0_r +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -6790,5 +7554,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/dasum_microk_power8.c b/kernel/power/dasum_microk_power8.c index 880d7d271..4652fc57c 100644 --- a/kernel/power/dasum_microk_power8.c +++ b/kernel/power/dasum_microk_power8.c @@ -68,10 +68,10 @@ static double dasum_kernel_16 (long n, double *x) "addi %2, %2, 128 \n\t" "addic. %1, %1, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvabsdp 48, 40 \n\t" "xvabsdp 49, 41 \n\t" @@ -108,9 +108,9 @@ static double dasum_kernel_16 (long n, double *x) "xvadddp 38, 38, %x5 \n\t" "xvadddp 39, 39, %x6 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvabsdp 48, 40 \n\t" "xvabsdp 49, 41 \n\t" @@ -140,7 +140,7 @@ static double dasum_kernel_16 (long n, double *x) "xvadddp 32, 32, 36 \n\t" - "xxswapd 33, 32 \n\t" + XXSWAPD_S(33,32) "xsadddp %x0, 32, 33 \n" "#n=%1 x=%3=%2 sum=%0 o16=%8 o32=%9 o48=%10 o64=%11 o80=%12 o96=%13 o112=%14\n" diff --git a/kernel/power/daxpy_microk_power8.c b/kernel/power/daxpy_microk_power8.c index fb714a3f9..a92026e83 100644 --- a/kernel/power/daxpy_microk_power8.c +++ b/kernel/power/daxpy_microk_power8.c @@ -58,7 +58,7 @@ static void daxpy_kernel_8 (long n, double *x, double *y, double alpha) __asm__ ( - "xxspltd %x4, %x22, 0 \n\t" + XXSPLTD_S(%x4,%x22,0) "dcbt 0, %2 \n\t" "dcbt 0, %3 \n\t" @@ -90,10 +90,10 @@ static void daxpy_kernel_8 (long n, double *x, double *y, double alpha) "addi %3, %3, -64 \n\t" "addic. %1, %1, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" ".align 5 \n" - "1: \n\t" + "one%=: \n\t" "xvmaddadp %x13, %x5, %x4 \n\t" "xvmaddadp %x14, %x6, %x4 \n\t" @@ -152,9 +152,9 @@ static void daxpy_kernel_8 (long n, double *x, double *y, double alpha) "addi %3, %3, -64 \n\t" "addic. %1, %1, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmaddadp %x13, %x5, %x4 \n\t" "xvmaddadp %x14, %x6, %x4 \n\t" diff --git a/kernel/power/dcopy_microk_power8.c b/kernel/power/dcopy_microk_power8.c index 261dc04de..b51a21d08 100644 --- a/kernel/power/dcopy_microk_power8.c +++ b/kernel/power/dcopy_microk_power8.c @@ -62,10 +62,10 @@ static void dcopy_kernel_32 (long n, double *x, double *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "stxvd2x 32, 0, %3 \n\t" "stxvd2x 33, %5, %3 \n\t" @@ -108,9 +108,9 @@ static void dcopy_kernel_32 (long n, double *x, double *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "stxvd2x 32, 0, %3 \n\t" "stxvd2x 33, %5, %3 \n\t" diff --git a/kernel/power/ddot_microk_power8.c b/kernel/power/ddot_microk_power8.c index 4e6bc29c9..d2518ef7e 100644 --- a/kernel/power/ddot_microk_power8.c +++ b/kernel/power/ddot_microk_power8.c @@ -78,10 +78,10 @@ static double ddot_kernel_8 (long n, double *x, double *y) "addi %3, %3, 128 \n\t" "addic. %1, %1, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmaddadp 32, 40, 48 \n\t" "lxvd2x 40, 0, %2 \n\t" @@ -112,9 +112,9 @@ static double ddot_kernel_8 (long n, double *x, double *y) "addi %3, %3, 128 \n\t" "addic. %1, %1, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmaddadp 32, 40, 48 \n\t" "xvmaddadp 33, 41, 49 \n\t" @@ -135,7 +135,7 @@ static double ddot_kernel_8 (long n, double *x, double *y) "xvadddp 32, 32, 36 \n\t" - "xxswapd 33, 32 \n\t" + XXSWAPD_S(33,32) "xsadddp %x0, 32, 33 \n" diff --git a/kernel/power/dgemm_macros_16x4_power8.S b/kernel/power/dgemm_macros_16x4_power8.S index 5be517f7c..782425fbd 100644 --- a/kernel/power/dgemm_macros_16x4_power8.S +++ b/kernel/power/dgemm_macros_16x4_power8.S @@ -37,7 +37,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4, M=16 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x16_1', ` +#else .macro LOAD4x16_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -58,10 +62,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 128 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_I1', ` +#else .macro KERNEL4x16_I1 +#endif xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 @@ -125,11 +137,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_1', ` +#else .macro KERNEL4x16_1 +#endif xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 @@ -194,9 +214,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 128 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_2', ` +#else .macro KERNEL4x16_2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 @@ -260,9 +288,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 128 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_L1', ` +#else .macro KERNEL4x16_L1 +#endif xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 @@ -326,9 +362,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_L2', ` +#else .macro KERNEL4x16_L2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 @@ -392,10 +436,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs15, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_E2', ` +#else .macro KERNEL4x16_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -434,9 +486,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs62, vs14, vs31 xvmaddadp vs63, vs15, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUBI1', ` +#else .macro KERNEL4x16_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -495,9 +555,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs62, vs6, vs27 xvmuldp vs63, vs7, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUB1', ` +#else .macro KERNEL4x16_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -555,9 +623,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs62, vs6, vs27 xvmaddadp vs63, vs7, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x16', ` +#else .macro SAVE4x16 +#endif add T2, CO, LDC @@ -680,13 +756,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs39, o112, T4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=8 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x8_1', ` +#else .macro LOAD4x8_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -703,9 +787,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_I1', ` +#else .macro KERNEL4x8_I1 +#endif xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 @@ -744,9 +836,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_1', ` +#else .macro KERNEL4x8_1 +#endif xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 @@ -784,9 +884,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_2', ` +#else .macro KERNEL4x8_2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 @@ -824,9 +932,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_E2', ` +#else .macro KERNEL4x8_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -849,9 +965,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs58, vs10, vs31 xvmaddadp vs59, vs11, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUBI1', ` +#else .macro KERNEL4x8_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -887,9 +1011,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs58, vs2, vs27 xvmuldp vs59, vs3, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUB1', ` +#else .macro KERNEL4x8_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -925,9 +1057,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs58, vs2, vs27 xvmaddadp vs59, vs3, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x8', ` +#else .macro SAVE4x8 +#endif mr T1, CO @@ -1035,13 +1175,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=4 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x4_1', ` +#else .macro LOAD4x4_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1054,9 +1202,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_I1', ` +#else .macro KERNEL4x4_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1082,9 +1238,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 xvmuldp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_1', ` +#else .macro KERNEL4x4_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1110,9 +1274,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 xvmaddadp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_2', ` +#else .macro KERNEL4x4_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1138,9 +1310,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 xvmaddadp vs57, vs9, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_E2', ` +#else .macro KERNEL4x4_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -1155,9 +1335,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 xvmaddadp vs57, vs9, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUBI1', ` +#else .macro KERNEL4x4_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1183,9 +1371,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 xvmuldp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUB1', ` +#else .macro KERNEL4x4_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1211,9 +1407,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 xvmaddadp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x4', ` +#else .macro SAVE4x4 +#endif mr T1, CO @@ -1289,13 +1493,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=2 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x2_1', ` +#else .macro LOAD4x2_1 +#endif lxvd2x vs0, 0, AO @@ -1307,9 +1519,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_I1', ` +#else .macro KERNEL4x2_I1 +#endif lxvd2x vs8, 0, AO @@ -1330,9 +1550,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_1', ` +#else .macro KERNEL4x2_1 +#endif lxvd2x vs8, 0, AO @@ -1353,9 +1581,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_2', ` +#else .macro KERNEL4x2_2 +#endif lxvd2x vs0, 0, AO @@ -1376,9 +1612,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_E2', ` +#else .macro KERNEL4x2_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -1389,9 +1633,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUBI1', ` +#else .macro KERNEL4x2_SUBI1 +#endif lxvd2x vs0, 0, AO @@ -1412,9 +1664,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUB1', ` +#else .macro KERNEL4x2_SUB1 +#endif lxvd2x vs0, 0, AO @@ -1435,9 +1695,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x2', ` +#else .macro SAVE4x2 +#endif mr T1, CO @@ -1497,13 +1765,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=1 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x1_1', ` +#else .macro LOAD4x1_1 +#endif lxsdx vs0, 0, AO @@ -1515,9 +1791,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 8 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_I1', ` +#else .macro KERNEL4x1_I1 +#endif lxsdx vs8, 0, AO @@ -1538,9 +1822,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_1', ` +#else .macro KERNEL4x1_1 +#endif lxsdx vs8, 0, AO @@ -1561,9 +1853,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_2', ` +#else .macro KERNEL4x1_2 +#endif lxsdx vs0, 0, AO @@ -1584,9 +1884,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_E2', ` +#else .macro KERNEL4x1_E2 +#endif xsmaddadp vs32, vs8, vs28 @@ -1597,9 +1905,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUBI1', ` +#else .macro KERNEL4x1_SUBI1 +#endif lxsdx vs0, 0, AO @@ -1620,9 +1936,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUB1', ` +#else .macro KERNEL4x1_SUB1 +#endif lxsdx vs0, 0, AO @@ -1643,9 +1967,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x1', ` +#else .macro SAVE4x1 +#endif mr T1, CO @@ -1705,13 +2037,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=16 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x16_1', ` +#else .macro LOAD2x16_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1731,9 +2071,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_I1', ` +#else .macro KERNEL2x16_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1772,9 +2120,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs46, vs6, vs25 xvmuldp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_1', ` +#else .macro KERNEL2x16_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1813,9 +2169,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs6, vs25 xvmaddadp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_2', ` +#else .macro KERNEL2x16_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1854,9 +2218,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs14, vs29 xvmaddadp vs47, vs15, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_E2', ` +#else .macro KERNEL2x16_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -1877,9 +2249,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs14, vs29 xvmaddadp vs47, vs15, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUBI1', ` +#else .macro KERNEL2x16_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1918,9 +2298,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs46, vs6, vs25 xvmuldp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUB1', ` +#else .macro KERNEL2x16_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1959,9 +2347,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs6, vs25 xvmaddadp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x16', ` +#else .macro SAVE2x16 +#endif mr T1, CO addi T2, T1, 64 @@ -2055,13 +2451,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=8 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2074,9 +2478,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2100,9 +2512,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs42, vs2, vs25 xvmuldp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2126,9 +2546,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs2, vs25 xvmaddadp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2152,9 +2580,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs10, vs29 xvmaddadp vs43, vs11, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -2167,9 +2603,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs10, vs29 xvmaddadp vs43, vs11, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2193,9 +2637,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs42, vs2, vs25 xvmuldp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2219,9 +2671,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs2, vs25 xvmaddadp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -2277,13 +2737,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=4 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2294,9 +2762,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2314,9 +2790,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 xvmuldp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2334,9 +2818,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 xvmaddadp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2354,9 +2846,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs8, vs29 xvmaddadp vs41, vs9, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -2365,9 +2865,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs8, vs29 xvmaddadp vs41, vs9, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2385,9 +2893,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 xvmuldp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2405,9 +2921,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 xvmaddadp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -2447,13 +2971,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=2 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxvd2x vs0, 0, AO @@ -2463,9 +2995,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxvd2x vs8, 0, AO @@ -2480,9 +3020,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxvd2x vs8, 0, AO @@ -2497,9 +3045,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxvd2x vs0, 0, AO @@ -2514,18 +3070,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxvd2x vs0, 0, AO @@ -2540,9 +3112,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxvd2x vs0, 0, AO @@ -2557,9 +3137,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -2591,13 +3179,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=1 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxsdx vs0, 0, AO @@ -2607,9 +3203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 8 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxsdx vs8, 0, AO @@ -2624,9 +3228,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxsdx vs8, 0, AO @@ -2641,9 +3253,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxsdx vs0, 0, AO @@ -2658,18 +3278,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xsmaddadp vs32, vs8, vs28 xsmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxsdx vs0, 0, AO @@ -2684,9 +3320,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxsdx vs0, 0, AO @@ -2701,9 +3345,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -2735,13 +3387,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=16 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x16_1', ` +#else .macro LOAD1x16_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2760,9 +3420,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_I1', ` +#else .macro KERNEL1x16_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2791,9 +3459,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs38, vs6, vs24 xvmuldp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_1', ` +#else .macro KERNEL1x16_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2822,9 +3498,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs6, vs24 xvmaddadp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_2', ` +#else .macro KERNEL1x16_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2853,9 +3537,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs14, vs28 xvmaddadp vs39, vs15, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_E2', ` +#else .macro KERNEL1x16_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -2867,9 +3559,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs14, vs28 xvmaddadp vs39, vs15, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUBI1', ` +#else .macro KERNEL1x16_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2898,9 +3598,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs38, vs6, vs24 xvmuldp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUB1', ` +#else .macro KERNEL1x16_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2929,9 +3637,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs6, vs24 xvmaddadp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x16', ` +#else .macro SAVE1x16 +#endif mr T1, CO addi T2, T1, 64 @@ -2980,13 +3696,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=8 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2998,9 +3722,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -3018,9 +3750,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs34, vs2, vs24 xvmuldp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -3038,9 +3778,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs2, vs24 xvmaddadp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3058,9 +3806,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs10, vs28 xvmaddadp vs35, vs11, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -3068,9 +3824,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs10, vs28 xvmaddadp vs35, vs11, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3088,9 +3852,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs34, vs2, vs24 xvmuldp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3108,9 +3880,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs2, vs24 xvmaddadp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -3140,13 +3920,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=4 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3156,9 +3944,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -3172,9 +3968,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -3188,9 +3992,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3204,17 +4016,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3228,9 +4056,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3244,9 +4080,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -3268,13 +4112,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=2 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxvd2x vs0, 0, AO @@ -3283,9 +4135,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxvd2x vs8, 0, AO @@ -3297,9 +4157,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxvd2x vs8, 0, AO @@ -3311,9 +4179,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxvd2x vs0, 0, AO @@ -3325,16 +4201,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xvmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxvd2x vs0, 0, AO @@ -3346,9 +4238,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxvd2x vs0, 0, AO @@ -3360,9 +4260,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -3380,13 +4288,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=1 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxsdx vs0, 0, AO @@ -3395,9 +4311,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 8 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxsdx vs8, 0, AO @@ -3409,9 +4333,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxsdx vs8, 0, AO @@ -3423,9 +4355,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxsdx vs0, 0, AO @@ -3437,16 +4377,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xsmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxsdx vs0, 0, AO @@ -3458,9 +4414,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxsdx vs0, 0, AO @@ -3472,9 +4436,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -3492,5 +4464,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/dgemm_ncopy_macros_4_power8.S b/kernel/power/dgemm_ncopy_macros_4_power8.S index 8d6744b91..33d02c77d 100644 --- a/kernel/power/dgemm_ncopy_macros_4_power8.S +++ b/kernel/power/dgemm_ncopy_macros_4_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x16', ` +#else .macro COPY_4x16 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o0, A1 @@ -180,14 +184,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x8', ` +#else .macro COPY_4x8 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -259,14 +271,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x4', ` +#else .macro COPY_4x4 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -310,14 +330,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x2', ` +#else .macro COPY_4x2 +#endif lxvd2x vs0, o0, A0 addi A0, A0, 16 @@ -348,14 +376,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x1', ` +#else .macro COPY_4x1 +#endif lxsdx vs0, o0, A0 addi A0, A0, 8 @@ -382,14 +418,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x16', ` +#else .macro COPY_2x16 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -459,14 +503,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x8', ` +#else .macro COPY_2x8 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -506,14 +558,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x4', ` +#else .macro COPY_2x4 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -539,14 +599,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x2', ` +#else .macro COPY_2x2 +#endif lxvd2x vs0, o0, A0 addi A0, A0, 16 @@ -565,14 +633,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x1', ` +#else .macro COPY_2x1 +#endif lxsdx vs0, o0, A0 addi A0, A0, 8 @@ -589,14 +665,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x16', ` +#else .macro COPY_1x16 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -622,14 +706,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x8', ` +#else .macro COPY_1x8 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -645,14 +737,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x4', ` +#else .macro COPY_1x4 +#endif lxvd2x vs0, o0, A0 lxvd2x vs1, o16, A0 @@ -664,14 +764,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x2', ` +#else .macro COPY_1x2 +#endif lxvd2x vs0, o0, A0 addi A0, A0, 16 @@ -681,14 +789,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x1', ` +#else .macro COPY_1x1 +#endif lxsdx vs0, o0, A0 addi A0, A0, 8 @@ -698,5 +814,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/dgemm_tcopy_macros_16_power8.S b/kernel/power/dgemm_tcopy_macros_16_power8.S index 68e53bcf2..6c5b8ed62 100644 --- a/kernel/power/dgemm_tcopy_macros_16_power8.S +++ b/kernel/power/dgemm_tcopy_macros_16_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x16', ` +#else .macro COPY_4x16 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -140,14 +144,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs10, o32, T1 stxvd2x vs11, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x8', ` +#else .macro COPY_4x8 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -205,14 +217,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs46, o32, T1 stxvd2x vs47, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x4', ` +#else .macro COPY_4x4 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -250,14 +270,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs38, o32, T1 stxvd2x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x2', ` +#else .macro COPY_4x2 +#endif lxvd2x vs32, o0, A0 addi A0, A0, 16 @@ -285,14 +313,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x1', ` +#else .macro COPY_4x1 +#endif lxsdx vs32, o0, A0 addi A0, A0, 8 @@ -322,14 +358,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsdx vs35, o8, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x16', ` +#else .macro COPY_2x16 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -383,14 +427,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs46, o32, T1 stxvd2x vs47, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x8', ` +#else .macro COPY_2x8 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -420,14 +472,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs38, o32, T1 stxvd2x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x4', ` +#else .macro COPY_2x4 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -447,14 +507,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs34, o32, T1 stxvd2x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x2', ` +#else .macro COPY_2x2 +#endif lxvd2x vs32, o0, A0 addi A0, A0, 16 @@ -470,14 +538,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x1', ` +#else .macro COPY_2x1 +#endif lxsdx vs32, o0, A0 addi A0, A0, 8 @@ -493,14 +569,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsdx vs33, o8, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x16', ` +#else .macro COPY_1x16 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -528,14 +612,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs38, o32, T1 stxvd2x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x8', ` +#else .macro COPY_1x8 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -551,14 +643,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs34, o32, T1 stxvd2x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x4', ` +#else .macro COPY_1x4 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -570,14 +670,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs32, o0, T1 stxvd2x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x2', ` +#else .macro COPY_1x2 +#endif lxvd2x vs32, o0, A0 addi A0, A0, 16 @@ -587,14 +695,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x1', ` +#else .macro COPY_1x1 +#endif lxsdx vs32, o0, A0 addi A0, A0, 8 @@ -604,5 +720,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsdx vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/dgemv_n_microk_power8.c b/kernel/power/dgemv_n_microk_power8.c index ae4fe9009..c2eb3968c 100644 --- a/kernel/power/dgemv_n_microk_power8.c +++ b/kernel/power/dgemv_n_microk_power8.c @@ -46,7 +46,7 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y ( "lxvd2x 34, 0, %10 \n\t" // x0, x1 "lxvd2x 35, %11, %10 \n\t" // x2, x3 - "xxspltd 32, %x9, 0 \n\t" // alpha, alpha + XXSPLTD_S(32,%x9,0) // alpha, alpha "sldi %6, %13, 3 \n\t" // lda * sizeof (double) @@ -56,10 +56,10 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y "add %4, %3, %6 \n\t" // a0 = ap, a1 = a0 + lda "add %6, %6, %6 \n\t" // 2 * lda - "xxspltd 32, 34, 0 \n\t" // x0 * alpha, x0 * alpha - "xxspltd 33, 34, 1 \n\t" // x1 * alpha, x1 * alpha - "xxspltd 34, 35, 0 \n\t" // x2 * alpha, x2 * alpha - "xxspltd 35, 35, 1 \n\t" // x3 * alpha, x3 * alpha + XXSPLTD_S(32,34,0) // x0 * alpha, x0 * alpha + XXSPLTD_S(33,34,1) // x1 * alpha, x1 * alpha + XXSPLTD_S(34,35,0) // x2 * alpha, x2 * alpha + XXSPLTD_S(35,35,1) // x3 * alpha, x3 * alpha "add %5, %3, %6 \n\t" // a2 = a0 + 2 * lda "add %6, %4, %6 \n\t" // a3 = a1 + 2 * lda @@ -89,10 +89,10 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y "addi %6, %6, 32 \n\t" "addic. %1, %1, -4 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "lxvd2x 36, 0, %2 \n\t" // y0, y1 "lxvd2x 37, %11, %2 \n\t" // y2, y3 @@ -131,7 +131,7 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y "addi %2, %2, 32 \n\t" "addic. %1, %1, -4 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" "lxvd2x 36, 0, %2 \n\t" // y0, y1 @@ -171,7 +171,7 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y "addi %2, %2, 32 \n\t" "addic. %1, %1, -4 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" "lxvd2x 36, 0, %2 \n\t" // y0, y1 @@ -211,7 +211,7 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y "addi %2, %2, 32 \n\t" "addic. %1, %1, -4 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" "lxvd2x 36, 0, %2 \n\t" // y0, y1 @@ -251,9 +251,9 @@ static void dgemv_kernel_4x4 (long n, double *ap, long lda, double *x, double *y "addi %2, %2, 32 \n\t" "addic. %1, %1, -4 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "lxvd2x 36, 0, %2 \n\t" // y0, y1 "lxvd2x 37, %11, %2 \n\t" // y2, y3 diff --git a/kernel/power/dgemv_t.c b/kernel/power/dgemv_t.c index b8589a131..ffe469d4d 100644 --- a/kernel/power/dgemv_t.c +++ b/kernel/power/dgemv_t.c @@ -93,11 +93,11 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do "li %[off],32 \n\t" - "ble- 2f \n\t" + "ble- two%= \n\t" //-------------------------------------------------- - ".p2align 5 \n\t" - "1: \n\t" + ".align 5 \n\t" + "one%=: \n\t" "xvmaddadp 34,36,32 \n\t" "xvmaddadp 35,38,32 \n\t" "addi %[off2], %[off2],32 \n\t" @@ -137,7 +137,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do "lxvd2x 49, %[a6], %[off2] \n\t" "lxvd2x 51, %[a7], %[off2] \n\t" "lxvd2x 33, %[x], %[off2] \n\t" - "ble- 2f \n\t" + "ble- two%= \n\t" "xvmaddadp 34,36,32 \n\t" "xvmaddadp 35,38,32 \n\t" "addi %[off2], %[off2],32 \n\t" @@ -177,7 +177,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do "lxvd2x 49, %[a6], %[off2] \n\t" "lxvd2x 51, %[a7], %[off2] \n\t" "lxvd2x 33, %[x], %[off2] \n\t" - "ble- 2f \n\t" + "ble- two%= \n\t" "xvmaddadp 34,36,32 \n\t" "xvmaddadp 35,38,32 \n\t" #if defined(PREFETCH) @@ -229,7 +229,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do "lxvd2x 33, %[x], %[off2] \n\t" "addic. %[n],%[n],-4 \n\t" - "ble- 2f \n\t" + "ble- two%= \n\t" "addi %[off2], %[off2],32 \n\t" #if defined(PREFETCH) @@ -288,9 +288,9 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do #if defined(PREFETCH) "dcbt %[temp],%[x] \n\t" #endif - "bgt+ 1b \n\t" - ".p2align 5 \n\t" - "2: \n\t" + "bgt+ one%= \n\t" + ".align 5 \n\t" + "two%=: \n\t" //-------------------------------------------- "xvmaddadp 34,36,32 \n\t" @@ -301,7 +301,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do "xvmaddadp 7,46,32 \n\t" "xvmaddadp 8,48,32 \n\t" "xvmaddadp 9,50,32 \n\t" - "xxspltd 36, %x[alpha], 0 \n\t" + XXSPLTD_S(36,%x[alpha],0) "xvmaddadp 34,37,33 \n\t" "xvmaddadp 35,39,33 \n\t" "xvmaddadp 4,41,33 \n\t" @@ -322,21 +322,21 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do - "xxmrgld 42,34,35 \n\t" - "xxmrghd 43,34,35 \n\t" + XXMRGLD_S(42,34,35) + XXMRGHD_S(43,34,35) - "xxmrgld 44,4,5 \n\t" - "xxmrghd 45,4,5 \n\t" + XXMRGLD_S(44,4,5) + XXMRGHD_S(45,4,5) "xvadddp 42,42,43 \n\t" - "xxmrgld 46,6,7 \n\t" - "xxmrghd 47,6,7 \n\t" + XXMRGLD_S(46,6,7) + XXMRGHD_S(47,6,7) "xvadddp 44,44,45 \n\t" - "xxmrgld 48,8,9 \n\t" - "xxmrghd 49,8,9 \n\t" + XXMRGLD_S(48,8,9) + XXMRGHD_S(49,8,9) "xvadddp 46,46,47 \n\t" diff --git a/kernel/power/drot_microk_power8.c b/kernel/power/drot_microk_power8.c index 016b7764d..259c08187 100644 --- a/kernel/power/drot_microk_power8.c +++ b/kernel/power/drot_microk_power8.c @@ -51,8 +51,8 @@ static void drot_kernel_16 (long n, double *x, double *y, double c, double s) __asm__ ( - "xxspltd 36, %x13, 0 \n\t" // load c to both dwords - "xxspltd 37, %x14, 0 \n\t" // load s to both dwords + XXSPLTD_S(36,%x13,0) // load c to both dwords + XXSPLTD_S(37,%x14,0) // load s to both dwords "lxvd2x 32, 0, %3 \n\t" // load x "lxvd2x 33, %15, %3 \n\t" @@ -68,10 +68,10 @@ static void drot_kernel_16 (long n, double *x, double *y, double c, double s) "addi %4, %4, 64 \n\t" "addic. %2, %2, -8 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmuldp 40, 32, 36 \n\t" // c * x "xvmuldp 41, 33, 36 \n\t" @@ -135,9 +135,9 @@ static void drot_kernel_16 (long n, double *x, double *y, double c, double s) "addi %4, %4, 128 \n\t" "addic. %2, %2, -8 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmuldp 40, 32, 36 \n\t" // c * x "xvmuldp 41, 33, 36 \n\t" diff --git a/kernel/power/dscal_microk_power8.c b/kernel/power/dscal_microk_power8.c index 04898eb3d..e9bacd05a 100644 --- a/kernel/power/dscal_microk_power8.c +++ b/kernel/power/dscal_microk_power8.c @@ -41,7 +41,7 @@ static void dscal_kernel_8 (long n, double *x, double alpha) ( "dcbt 0, %2 \n\t" - "xxspltd %x3, %x3, 0 \n\t" + XXSPLTD_S(%x3,%x3,0) "lxvd2x 32, 0, %2 \n\t" "lxvd2x 33, %4, %2 \n\t" @@ -55,10 +55,10 @@ static void dscal_kernel_8 (long n, double *x, double alpha) "addi %2, %2, 128 \n\t" "addic. %1, %1, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmuldp 40, 32, %x3 \n\t" "xvmuldp 41, 33, %x3 \n\t" @@ -91,9 +91,9 @@ static void dscal_kernel_8 (long n, double *x, double alpha) "addi %2, %2, 256 \n\t" "addic. %1, %1, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmuldp 40, 32, %x3 \n\t" "xvmuldp 41, 33, %x3 \n\t" @@ -146,8 +146,8 @@ static void dscal_kernel_8_zero (long n, double *x) ( "xxlxor %x3, %x3, %x3 \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "stxvd2x %x3, 0, %2 \n\t" "stxvd2x %x3, %4, %2 \n\t" @@ -161,7 +161,7 @@ static void dscal_kernel_8_zero (long n, double *x) "addi %2, %2, 128 \n\t" "addic. %1, %1, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" "#n=%1 x=%0=%2 t0=%x3 o16=%4 o32=%5 o48=%6 o64=%7 o80=%8 o96=%9 o112=%10" : diff --git a/kernel/power/dswap_microk_power8.c b/kernel/power/dswap_microk_power8.c index 31eff3449..ecfd5c9f9 100644 --- a/kernel/power/dswap_microk_power8.c +++ b/kernel/power/dswap_microk_power8.c @@ -39,8 +39,8 @@ static void dswap_kernel_32 (long n, double *x, double *y) { __asm__ ( - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "lxvd2x 32, 0, %4 \n\t" "lxvd2x 33, %5, %4 \n\t" @@ -131,7 +131,7 @@ static void dswap_kernel_32 (long n, double *x, double *y) "addi %4, %4, 128 \n\t" "addic. %2, %2, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" "#n=%2 x=%0=%3 y=%1=%4 o16=%5 o32=%6 o48=%7 o64=%8 o80=%9 o96=%10 o112=%11" : diff --git a/kernel/power/dtrmm_macros_16x4_power8.S b/kernel/power/dtrmm_macros_16x4_power8.S index 079144a90..efb034594 100644 --- a/kernel/power/dtrmm_macros_16x4_power8.S +++ b/kernel/power/dtrmm_macros_16x4_power8.S @@ -37,7 +37,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4, M=16 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x16_1', ` +#else .macro LOAD4x16_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -60,9 +64,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_I1', ` +#else .macro KERNEL4x16_I1 +#endif xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 @@ -127,9 +139,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_1', ` +#else .macro KERNEL4x16_1 +#endif xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 @@ -195,9 +215,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_2', ` +#else .macro KERNEL4x16_2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 @@ -262,9 +290,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_E2', ` +#else .macro KERNEL4x16_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -303,9 +339,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs62, vs14, vs31 xvmaddadp vs63, vs15, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUBI1', ` +#else .macro KERNEL4x16_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -364,9 +408,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs62, vs6, vs27 xvmuldp vs63, vs7, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUB1', ` +#else .macro KERNEL4x16_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -425,9 +477,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs62, vs6, vs27 xvmaddadp vs63, vs7, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x16', ` +#else .macro SAVE4x16 +#endif mr T1, CO addi T2, T1, 64 @@ -615,13 +675,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=8 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x8_1', ` +#else .macro LOAD4x8_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -638,9 +706,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_I1', ` +#else .macro KERNEL4x8_I1 +#endif xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 @@ -679,9 +755,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_1', ` +#else .macro KERNEL4x8_1 +#endif xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 @@ -719,9 +803,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_2', ` +#else .macro KERNEL4x8_2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 @@ -759,9 +851,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_E2', ` +#else .macro KERNEL4x8_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -784,9 +884,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs58, vs10, vs31 xvmaddadp vs59, vs11, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUBI1', ` +#else .macro KERNEL4x8_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -822,9 +930,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs58, vs2, vs27 xvmuldp vs59, vs3, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUB1', ` +#else .macro KERNEL4x8_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -860,9 +976,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs58, vs2, vs27 xvmaddadp vs59, vs3, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x8', ` +#else .macro SAVE4x8 +#endif mr T1, CO @@ -970,13 +1094,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=4 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x4_1', ` +#else .macro LOAD4x4_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -989,9 +1121,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_I1', ` +#else .macro KERNEL4x4_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1017,9 +1157,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 xvmuldp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_1', ` +#else .macro KERNEL4x4_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1045,9 +1193,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 xvmaddadp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_2', ` +#else .macro KERNEL4x4_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1073,9 +1229,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 xvmaddadp vs57, vs9, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_E2', ` +#else .macro KERNEL4x4_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -1090,9 +1254,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 xvmaddadp vs57, vs9, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUBI1', ` +#else .macro KERNEL4x4_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1118,9 +1290,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 xvmuldp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUB1', ` +#else .macro KERNEL4x4_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1146,9 +1326,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 xvmaddadp vs57, vs1, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x4', ` +#else .macro SAVE4x4 +#endif mr T1, CO @@ -1224,13 +1412,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=2 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x2_1', ` +#else .macro LOAD4x2_1 +#endif lxvd2x vs0, 0, AO @@ -1242,9 +1438,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_I1', ` +#else .macro KERNEL4x2_I1 +#endif lxvd2x vs8, 0, AO @@ -1265,9 +1469,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_1', ` +#else .macro KERNEL4x2_1 +#endif lxvd2x vs8, 0, AO @@ -1288,9 +1500,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_2', ` +#else .macro KERNEL4x2_2 +#endif lxvd2x vs0, 0, AO @@ -1311,9 +1531,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_E2', ` +#else .macro KERNEL4x2_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -1324,9 +1552,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUBI1', ` +#else .macro KERNEL4x2_SUBI1 +#endif lxvd2x vs0, 0, AO @@ -1347,9 +1583,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUB1', ` +#else .macro KERNEL4x2_SUB1 +#endif lxvd2x vs0, 0, AO @@ -1370,9 +1614,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x2', ` +#else .macro SAVE4x2 +#endif mr T1, CO @@ -1432,13 +1684,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=1 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD4x1_1', ` +#else .macro LOAD4x1_1 +#endif lxsdx vs0, 0, AO @@ -1450,9 +1710,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 8 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_I1', ` +#else .macro KERNEL4x1_I1 +#endif lxsdx vs8, 0, AO @@ -1473,9 +1741,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_1', ` +#else .macro KERNEL4x1_1 +#endif lxsdx vs8, 0, AO @@ -1496,9 +1772,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_2', ` +#else .macro KERNEL4x1_2 +#endif lxsdx vs0, 0, AO @@ -1519,9 +1803,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_E2', ` +#else .macro KERNEL4x1_E2 +#endif xsmaddadp vs32, vs8, vs28 @@ -1532,9 +1824,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs8, vs31 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUBI1', ` +#else .macro KERNEL4x1_SUBI1 +#endif lxsdx vs0, 0, AO @@ -1555,9 +1855,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUB1', ` +#else .macro KERNEL4x1_SUB1 +#endif lxsdx vs0, 0, AO @@ -1578,9 +1886,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs56, vs0, vs27 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x1', ` +#else .macro SAVE4x1 +#endif mr T1, CO @@ -1640,13 +1956,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=16 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x16_1', ` +#else .macro LOAD2x16_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1666,9 +1990,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_I1', ` +#else .macro KERNEL2x16_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1707,9 +2039,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs46, vs6, vs25 xvmuldp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_1', ` +#else .macro KERNEL2x16_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -1748,9 +2088,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs6, vs25 xvmaddadp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_2', ` +#else .macro KERNEL2x16_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1789,9 +2137,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs14, vs29 xvmaddadp vs47, vs15, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_E2', ` +#else .macro KERNEL2x16_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -1812,9 +2168,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs14, vs29 xvmaddadp vs47, vs15, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUBI1', ` +#else .macro KERNEL2x16_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1853,9 +2217,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs46, vs6, vs25 xvmuldp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUB1', ` +#else .macro KERNEL2x16_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -1894,9 +2266,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs46, vs6, vs25 xvmaddadp vs47, vs7, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x16', ` +#else .macro SAVE2x16 +#endif mr T1, CO addi T2, T1, 64 @@ -1990,13 +2370,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=8 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2009,9 +2397,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2035,9 +2431,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs42, vs2, vs25 xvmuldp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2061,9 +2465,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs2, vs25 xvmaddadp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2087,9 +2499,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs10, vs29 xvmaddadp vs43, vs11, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -2102,9 +2522,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs10, vs29 xvmaddadp vs43, vs11, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2128,9 +2556,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs42, vs2, vs25 xvmuldp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2154,9 +2590,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs42, vs2, vs25 xvmaddadp vs43, vs3, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -2212,13 +2656,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=4 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2229,9 +2681,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2249,9 +2709,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 xvmuldp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2269,9 +2737,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 xvmaddadp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2289,9 +2765,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs8, vs29 xvmaddadp vs41, vs9, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -2300,9 +2784,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs8, vs29 xvmaddadp vs41, vs9, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2320,9 +2812,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 xvmuldp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2340,9 +2840,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 xvmaddadp vs41, vs1, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -2382,13 +2890,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=2 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxvd2x vs0, 0, AO @@ -2398,9 +2914,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxvd2x vs8, 0, AO @@ -2415,9 +2939,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxvd2x vs8, 0, AO @@ -2432,9 +2964,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxvd2x vs0, 0, AO @@ -2449,18 +2989,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxvd2x vs0, 0, AO @@ -2475,9 +3031,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxvd2x vs0, 0, AO @@ -2492,9 +3056,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -2526,13 +3098,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=2, M=1 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxsdx vs0, 0, AO @@ -2542,9 +3122,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 8 addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxsdx vs8, 0, AO @@ -2559,9 +3147,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxsdx vs8, 0, AO @@ -2576,9 +3172,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxsdx vs0, 0, AO @@ -2593,18 +3197,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xsmaddadp vs32, vs8, vs28 xsmaddadp vs40, vs8, vs29 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxsdx vs0, 0, AO @@ -2619,9 +3239,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxsdx vs0, 0, AO @@ -2636,9 +3264,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs40, vs0, vs25 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -2670,13 +3306,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=16 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x16_1', ` +#else .macro LOAD1x16_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2695,9 +3339,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_I1', ` +#else .macro KERNEL1x16_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2726,9 +3378,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs38, vs6, vs24 xvmuldp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_1', ` +#else .macro KERNEL1x16_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2757,9 +3417,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs6, vs24 xvmaddadp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_2', ` +#else .macro KERNEL1x16_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2788,9 +3456,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs14, vs28 xvmaddadp vs39, vs15, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_E2', ` +#else .macro KERNEL1x16_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -2802,9 +3478,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs14, vs28 xvmaddadp vs39, vs15, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUBI1', ` +#else .macro KERNEL1x16_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2833,9 +3517,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs38, vs6, vs24 xvmuldp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUB1', ` +#else .macro KERNEL1x16_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2864,9 +3556,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs38, vs6, vs24 xvmaddadp vs39, vs7, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x16', ` +#else .macro SAVE1x16 +#endif mr T1, CO addi T2, T1, 64 @@ -2915,13 +3615,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=4, M=8 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2933,9 +3641,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2953,9 +3669,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs34, vs2, vs24 xvmuldp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -2973,9 +3697,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs2, vs24 xvmaddadp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -2993,9 +3725,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs10, vs28 xvmaddadp vs35, vs11, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddadp vs32, vs8, vs28 @@ -3003,9 +3743,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs10, vs28 xvmaddadp vs35, vs11, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3023,9 +3771,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs34, vs2, vs24 xvmuldp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3043,9 +3799,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs34, vs2, vs24 xvmaddadp vs35, vs3, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -3075,13 +3839,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=4 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3091,9 +3863,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -3107,9 +3887,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvd2x vs8, 0, AO lxvd2x vs9, o16, AO @@ -3123,9 +3911,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3139,17 +3935,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddadp vs32, vs8, vs28 xvmaddadp vs33, vs9, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3163,9 +3975,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 xvmuldp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvd2x vs0, 0, AO lxvd2x vs1, o16, AO @@ -3179,9 +3999,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 xvmaddadp vs33, vs1, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -3203,13 +4031,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=2 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxvd2x vs0, 0, AO @@ -3218,9 +4054,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxvd2x vs8, 0, AO @@ -3232,9 +4076,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxvd2x vs8, 0, AO @@ -3246,9 +4098,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxvd2x vs0, 0, AO @@ -3260,16 +4120,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xvmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxvd2x vs0, 0, AO @@ -3281,9 +4157,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxvd2x vs0, 0, AO @@ -3295,9 +4179,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -3315,13 +4207,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************* * Macros for N=1, M=1 * *********************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxsdx vs0, 0, AO @@ -3330,9 +4230,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 8 addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxsdx vs8, 0, AO @@ -3344,9 +4252,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxsdx vs8, 0, AO @@ -3358,9 +4274,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxsdx vs0, 0, AO @@ -3372,16 +4296,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xsmaddadp vs32, vs8, vs28 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxsdx vs0, 0, AO @@ -3393,9 +4333,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxsdx vs0, 0, AO @@ -3407,9 +4355,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs24 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -3427,5 +4383,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/dtrsm_macros_LT_16x4_power8.S b/kernel/power/dtrsm_macros_LT_16x4_power8.S index dc47daa3a..5a5c4037c 100644 --- a/kernel/power/dtrsm_macros_LT_16x4_power8.S +++ b/kernel/power/dtrsm_macros_LT_16x4_power8.S @@ -1,46 +1,58 @@ +#if defined(_AIX) +define(`INIT_16x4', ` +#else .macro INIT_16x4 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 - xvmovdp vs40, vs0 - xvmovdp vs41, vs0 - xvmovdp vs42, vs0 - xvmovdp vs43, vs0 - xvmovdp vs44, vs0 - xvmovdp vs45, vs0 - xvmovdp vs46, vs0 - xvmovdp vs47, vs0 - xvmovdp vs48, vs0 - xvmovdp vs49, vs0 - xvmovdp vs50, vs0 - xvmovdp vs51, vs0 - xvmovdp vs52, vs0 - xvmovdp vs53, vs0 - xvmovdp vs54, vs0 - xvmovdp vs55, vs0 - xvmovdp vs56, vs0 - xvmovdp vs57, vs0 - xvmovdp vs58, vs0 - xvmovdp vs59, vs0 - xvmovdp vs60, vs0 - xvmovdp vs61, vs0 - xvmovdp vs62, vs0 - xvmovdp vs63, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) + XVMOVDP(vs40,vs0) + XVMOVDP(vs41,vs0) + XVMOVDP(vs42,vs0) + XVMOVDP(vs43,vs0) + XVMOVDP(vs44,vs0) + XVMOVDP(vs45,vs0) + XVMOVDP(vs46,vs0) + XVMOVDP(vs47,vs0) + XVMOVDP(vs48,vs0) + XVMOVDP(vs49,vs0) + XVMOVDP(vs50,vs0) + XVMOVDP(vs51,vs0) + XVMOVDP(vs52,vs0) + XVMOVDP(vs53,vs0) + XVMOVDP(vs54,vs0) + XVMOVDP(vs55,vs0) + XVMOVDP(vs56,vs0) + XVMOVDP(vs57,vs0) + XVMOVDP(vs58,vs0) + XVMOVDP(vs59,vs0) + XVMOVDP(vs60,vs0) + XVMOVDP(vs61,vs0) + XVMOVDP(vs62,vs0) + XVMOVDP(vs63,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_16x4', ` +#else .macro KERNEL_16x4 +#endif lxvd2x vs0, o0, AO @@ -98,35 +110,51 @@ xvmaddadp vs63, vs7, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_8x4', ` +#else .macro INIT_8x4 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 - xvmovdp vs40, vs0 - xvmovdp vs41, vs0 - xvmovdp vs42, vs0 - xvmovdp vs43, vs0 - xvmovdp vs44, vs0 - xvmovdp vs45, vs0 - xvmovdp vs46, vs0 - xvmovdp vs47, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) + XVMOVDP(vs40,vs0) + XVMOVDP(vs41,vs0) + XVMOVDP(vs42,vs0) + XVMOVDP(vs43,vs0) + XVMOVDP(vs44,vs0) + XVMOVDP(vs45,vs0) + XVMOVDP(vs46,vs0) + XVMOVDP(vs47,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_8x4', ` +#else .macro KERNEL_8x4 +#endif lxvd2x vs0, o0, AO @@ -161,27 +189,43 @@ xvmaddadp vs47, vs3, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_4x4', ` +#else .macro INIT_4x4 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_4x4', ` +#else .macro KERNEL_4x4 +#endif lxvd2x vs0, o0, AO @@ -206,23 +250,39 @@ xvmaddadp vs39, vs1, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_2x4', ` +#else .macro INIT_2x4 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_2x4', ` +#else .macro KERNEL_2x4 +#endif lxvd2x vs0, o0, AO @@ -242,23 +302,39 @@ xvmaddadp vs35, vs0, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_1x4', ` +#else .macro INIT_1x4 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_1x4', ` +#else .macro KERNEL_1x4 +#endif lxvdsx vs0, o0, AO @@ -278,14 +354,22 @@ xvmaddadp vs35, vs0, vs19 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 16x4 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_16x4', ` +#else .macro SOLVE_LT_16x4 +#endif //############### LOAD B ####################### @@ -1149,46 +1233,46 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs34, o8, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs36, o16, T1 - xxswapd vs36, vs36 + XXSWAPD(vs36,vs36) stxsdx vs38, o24, T1 - xxswapd vs38, vs38 + XXSWAPD(vs38,vs38) addi T1, T1, 32 stxsdx vs40, o0, T1 - xxswapd vs40, vs40 + XXSWAPD(vs40,vs40) stxsdx vs42, o8, T1 - xxswapd vs42, vs42 + XXSWAPD(vs42,vs42) stxsdx vs44, o16, T1 - xxswapd vs44, vs44 + XXSWAPD(vs44,vs44) stxsdx vs46, o24, T1 - xxswapd vs46, vs46 + XXSWAPD(vs46,vs46) addi T1, T1, 32 stxsdx vs48, o0, T1 - xxswapd vs48, vs48 + XXSWAPD(vs48,vs48) stxsdx vs50, o8, T1 - xxswapd vs50, vs50 + XXSWAPD(vs50,vs50) stxsdx vs52, o16, T1 - xxswapd vs52, vs52 + XXSWAPD(vs52,vs52) stxsdx vs54, o24, T1 - xxswapd vs54, vs54 + XXSWAPD(vs54,vs54) addi T1, T1, 32 stxsdx vs56, o0, T1 - xxswapd vs56, vs56 + XXSWAPD(vs56,vs56) stxsdx vs58, o8, T1 - xxswapd vs58, vs58 + XXSWAPD(vs58,vs58) stxsdx vs60, o16, T1 - xxswapd vs60, vs60 + XXSWAPD(vs60,vs60) stxsdx vs62, o24, T1 - xxswapd vs62, vs62 + XXSWAPD(vs62,vs62) stxsdx vs32, o0, T2 stxsdx vs34, o8, T2 @@ -1225,46 +1309,46 @@ stxsdx vs33, o0, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs35, o8, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) stxsdx vs37, o16, T1 - xxswapd vs37, vs37 + XXSWAPD(vs37,vs37) stxsdx vs39, o24, T1 - xxswapd vs39, vs39 + XXSWAPD(vs39,vs39) addi T1, T1, 32 stxsdx vs41, o0, T1 - xxswapd vs41, vs41 + XXSWAPD(vs41,vs41) stxsdx vs43, o8, T1 - xxswapd vs43, vs43 + XXSWAPD(vs43,vs43) stxsdx vs45, o16, T1 - xxswapd vs45, vs45 + XXSWAPD(vs45,vs45) stxsdx vs47, o24, T1 - xxswapd vs47, vs47 + XXSWAPD(vs47,vs47) addi T1, T1, 32 stxsdx vs49, o0, T1 - xxswapd vs49, vs49 + XXSWAPD(vs49,vs49) stxsdx vs51, o8, T1 - xxswapd vs51, vs51 + XXSWAPD(vs51,vs51) stxsdx vs53, o16, T1 - xxswapd vs53, vs53 + XXSWAPD(vs53,vs53) stxsdx vs55, o24, T1 - xxswapd vs55, vs55 + XXSWAPD(vs55,vs55) addi T1, T1, 32 stxsdx vs57, o0, T1 - xxswapd vs57, vs57 + XXSWAPD(vs57,vs57) stxsdx vs59, o8, T1 - xxswapd vs59, vs59 + XXSWAPD(vs59,vs59) stxsdx vs61, o16, T1 - xxswapd vs61, vs61 + XXSWAPD(vs61,vs61) stxsdx vs63, o24, T1 - xxswapd vs63, vs63 + XXSWAPD(vs63,vs63) stxsdx vs33, o0, T2 stxsdx vs35, o8, T2 @@ -1292,14 +1376,22 @@ stxsdx vs61, o16, T2 stxsdx vs63, o24, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 8x4 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_8x4', ` +#else .macro SOLVE_LT_8x4 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs34, vs35, 0 @@ -1603,24 +1695,24 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs34, o8, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs36, o16, T1 - xxswapd vs36, vs36 + XXSWAPD(vs36,vs36) stxsdx vs38, o24, T1 - xxswapd vs38, vs38 + XXSWAPD(vs38,vs38) addi T1, T1, 32 stxsdx vs40, o0, T1 - xxswapd vs40, vs40 + XXSWAPD(vs40,vs40) stxsdx vs42, o8, T1 - xxswapd vs42, vs42 + XXSWAPD(vs42,vs42) stxsdx vs44, o16, T1 - xxswapd vs44, vs44 + XXSWAPD(vs44,vs44) stxsdx vs46, o24, T1 - xxswapd vs46, vs46 + XXSWAPD(vs46,vs46) stxsdx vs32, o0, T2 stxsdx vs34, o8, T2 @@ -1643,24 +1735,24 @@ stxsdx vs33, o0, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs35, o8, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) stxsdx vs37, o16, T1 - xxswapd vs37, vs37 + XXSWAPD(vs37,vs37) stxsdx vs39, o24, T1 - xxswapd vs39, vs39 + XXSWAPD(vs39,vs39) addi T1, T1, 32 stxsdx vs41, o0, T1 - xxswapd vs41, vs41 + XXSWAPD(vs41,vs41) stxsdx vs43, o8, T1 - xxswapd vs43, vs43 + XXSWAPD(vs43,vs43) stxsdx vs45, o16, T1 - xxswapd vs45, vs45 + XXSWAPD(vs45,vs45) stxsdx vs47, o24, T1 - xxswapd vs47, vs47 + XXSWAPD(vs47,vs47) stxsdx vs33, o0, T2 stxsdx vs35, o8, T2 @@ -1674,14 +1766,22 @@ stxsdx vs45, o16, T2 stxsdx vs47, o24, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 4x4 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_4x4', ` +#else .macro SOLVE_LT_4x4 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs34, vs35, 0 @@ -1813,13 +1913,13 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs34, o8, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs36, o16, T1 - xxswapd vs36, vs36 + XXSWAPD(vs36,vs36) stxsdx vs38, o24, T1 - xxswapd vs38, vs38 + XXSWAPD(vs38,vs38) stxsdx vs32, o0, T2 stxsdx vs34, o8, T2 @@ -1835,27 +1935,35 @@ stxsdx vs33, o0, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs35, o8, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) stxsdx vs37, o16, T1 - xxswapd vs37, vs37 + XXSWAPD(vs37,vs37) stxsdx vs39, o24, T1 - xxswapd vs39, vs39 + XXSWAPD(vs39,vs39) stxsdx vs33, o0, T2 stxsdx vs35, o8, T2 stxsdx vs37, o16, T2 stxsdx vs39, o24, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 2x4 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_2x4', ` +#else .macro SOLVE_LT_2x4 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs34, vs35, 0 @@ -1925,9 +2033,9 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs34, o8, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs32, o0, T2 stxsdx vs34, o8, T2 @@ -1941,21 +2049,29 @@ stxsdx vs33, o0, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs35, o8, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) stxsdx vs33, o0, T2 stxsdx vs35, o8, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 1x4 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_1x4', ` +#else .macro SOLVE_LT_1x4 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs34, vs35, 0 @@ -2001,7 +2117,7 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs32, o0, T2 @@ -2014,39 +2130,55 @@ stxsdx vs33, o0, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs33, o0, T2 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_16x2', ` +#else .macro INIT_16x2 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 - xvmovdp vs40, vs0 - xvmovdp vs41, vs0 - xvmovdp vs42, vs0 - xvmovdp vs43, vs0 - xvmovdp vs44, vs0 - xvmovdp vs45, vs0 - xvmovdp vs46, vs0 - xvmovdp vs47, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) + XVMOVDP(vs40,vs0) + XVMOVDP(vs41,vs0) + XVMOVDP(vs42,vs0) + XVMOVDP(vs43,vs0) + XVMOVDP(vs44,vs0) + XVMOVDP(vs45,vs0) + XVMOVDP(vs46,vs0) + XVMOVDP(vs47,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_16x2', ` +#else .macro KERNEL_16x2 +#endif lxvd2x vs0, o0, AO @@ -2086,27 +2218,43 @@ xvmaddadp vs47, vs7, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_8x2', ` +#else .macro INIT_8x2 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_8x2', ` +#else .macro KERNEL_8x2 +#endif lxvd2x vs0, o0, AO @@ -2131,23 +2279,39 @@ xvmaddadp vs39, vs3, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_4x2', ` +#else .macro INIT_4x2 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_4x2', ` +#else .macro KERNEL_4x2 +#endif lxvd2x vs0, o0, AO @@ -2166,21 +2330,37 @@ xvmaddadp vs35, vs1, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_2x2', ` +#else .macro INIT_2x2 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_2x2', ` +#else .macro KERNEL_2x2 +#endif lxvd2x vs0, o0, AO @@ -2196,21 +2376,37 @@ xvmaddadp vs33, vs0, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_1x2', ` +#else .macro INIT_1x2 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_1x2', ` +#else .macro KERNEL_1x2 +#endif lxvdsx vs0, o0, AO @@ -2226,14 +2422,22 @@ xvmaddadp vs33, vs0, vs17 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 16x2 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_16x2', ` +#else .macro SOLVE_LT_16x2 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs32, vs33, 3 @@ -2821,46 +3025,46 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs33, o8, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs34, o16, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs35, o24, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) addi T1, T1, 32 stxsdx vs36, o0, T1 - xxswapd vs36, vs36 + XXSWAPD(vs36,vs36) stxsdx vs37, o8, T1 - xxswapd vs37, vs37 + XXSWAPD(vs37,vs37) stxsdx vs38, o16, T1 - xxswapd vs38, vs38 + XXSWAPD(vs38,vs38) stxsdx vs39, o24, T1 - xxswapd vs39, vs39 + XXSWAPD(vs39,vs39) addi T1, T1, 32 stxsdx vs40, o0, T1 - xxswapd vs40, vs40 + XXSWAPD(vs40,vs40) stxsdx vs41, o8, T1 - xxswapd vs41, vs41 + XXSWAPD(vs41,vs41) stxsdx vs42, o16, T1 - xxswapd vs42, vs42 + XXSWAPD(vs42,vs42) stxsdx vs43, o24, T1 - xxswapd vs43, vs43 + XXSWAPD(vs43,vs43) addi T1, T1, 32 stxsdx vs44, o0, T1 - xxswapd vs44, vs44 + XXSWAPD(vs44,vs44) stxsdx vs45, o8, T1 - xxswapd vs45, vs45 + XXSWAPD(vs45,vs45) stxsdx vs46, o16, T1 - xxswapd vs46, vs46 + XXSWAPD(vs46,vs46) stxsdx vs47, o24, T1 - xxswapd vs47, vs47 + XXSWAPD(vs47,vs47) stxsdx vs32, o0, T2 stxsdx vs33, o8, T2 @@ -2888,14 +3092,22 @@ stxsdx vs46, o16, T2 stxsdx vs47, o24, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 8x2 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_8x2', ` +#else .macro SOLVE_LT_8x2 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs32, vs33, 3 @@ -3111,24 +3323,24 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs33, o8, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs34, o16, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs35, o24, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) addi T1, T1, 32 stxsdx vs36, o0, T1 - xxswapd vs36, vs36 + XXSWAPD(vs36,vs36) stxsdx vs37, o8, T1 - xxswapd vs37, vs37 + XXSWAPD(vs37,vs37) stxsdx vs38, o16, T1 - xxswapd vs38, vs38 + XXSWAPD(vs38,vs38) stxsdx vs39, o24, T1 - xxswapd vs39, vs39 + XXSWAPD(vs39,vs39) stxsdx vs32, o0, T2 stxsdx vs33, o8, T2 @@ -3142,14 +3354,22 @@ stxsdx vs38, o16, T2 stxsdx vs39, o24, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 4x2 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_4x2', ` +#else .macro SOLVE_LT_4x2 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs32, vs33, 3 @@ -3245,27 +3465,35 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs33, o8, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs34, o16, T1 - xxswapd vs34, vs34 + XXSWAPD(vs34,vs34) stxsdx vs35, o24, T1 - xxswapd vs35, vs35 + XXSWAPD(vs35,vs35) stxsdx vs32, o0, T2 stxsdx vs33, o8, T2 stxsdx vs34, o16, T2 stxsdx vs35, o24, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 2x2 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_2x2', ` +#else .macro SOLVE_LT_2x2 +#endif xxpermdi vs0, vs32, vs33, 0 xxpermdi vs1, vs32, vs33, 3 @@ -3322,21 +3550,29 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs33, o8, T1 - xxswapd vs33, vs33 + XXSWAPD(vs33,vs33) stxsdx vs32, o0, T2 stxsdx vs33, o8, T2 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 1x2 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_1x2', ` +#else .macro SOLVE_LT_1x2 +#endif xxpermdi vs0, vs32, vs33, 0 @@ -3376,39 +3612,55 @@ stxsdx vs32, o0, T1 - xxswapd vs32, vs32 + XXSWAPD(vs32,vs32) stxsdx vs32, o0, T2 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_16x1', ` +#else .macro INIT_16x1 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 - xvmovdp vs40, vs0 - xvmovdp vs41, vs0 - xvmovdp vs42, vs0 - xvmovdp vs43, vs0 - xvmovdp vs44, vs0 - xvmovdp vs45, vs0 - xvmovdp vs46, vs0 - xvmovdp vs47, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) + XVMOVDP(vs40,vs0) + XVMOVDP(vs41,vs0) + XVMOVDP(vs42,vs0) + XVMOVDP(vs43,vs0) + XVMOVDP(vs44,vs0) + XVMOVDP(vs45,vs0) + XVMOVDP(vs46,vs0) + XVMOVDP(vs47,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_16x1', ` +#else .macro KERNEL_16x1 +#endif lxvdsx vs0, o0, AO @@ -3461,27 +3713,43 @@ xvmaddadp vs47, vs15, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_8x1', ` +#else .macro INIT_8x1 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 - xvmovdp vs36, vs0 - xvmovdp vs37, vs0 - xvmovdp vs38, vs0 - xvmovdp vs39, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) + XVMOVDP(vs36,vs0) + XVMOVDP(vs37,vs0) + XVMOVDP(vs38,vs0) + XVMOVDP(vs39,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_8x1', ` +#else .macro KERNEL_8x1 +#endif lxvdsx vs0, o0, AO @@ -3512,23 +3780,39 @@ xvmaddadp vs39, vs7, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_4x1', ` +#else .macro INIT_4x1 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 - xvmovdp vs34, vs0 - xvmovdp vs35, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) + XVMOVDP(vs34,vs0) + XVMOVDP(vs35,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_4x1', ` +#else .macro KERNEL_4x1 +#endif lxvdsx vs0, o0, AO @@ -3548,21 +3832,37 @@ xvmaddadp vs35, vs3, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_2x1', ` +#else .macro INIT_2x1 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 - xvmovdp vs33, vs0 + XVMOVDP(vs32,vs0) + XVMOVDP(vs33,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_2x1', ` +#else .macro KERNEL_2x1 +#endif lxvdsx vs0, o0, AO @@ -3578,20 +3878,36 @@ xvmaddadp vs33, vs1, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`INIT_1x1', ` +#else .macro INIT_1x1 +#endif xxlxor vs0, vs0, vs0 - xvmovdp vs32, vs0 + XVMOVDP(vs32,vs0) +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL_1x1', ` +#else .macro KERNEL_1x1 +#endif lxvdsx vs0, o0, AO @@ -3605,31 +3921,39 @@ xvmaddadp vs32, vs0, vs16 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 16x1 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_16x1', ` +#else .macro SOLVE_LT_16x1 +#endif - xxswapd vs0, vs32 - xxswapd vs1, vs33 - xxswapd vs2, vs34 - xxswapd vs3, vs35 - xxswapd vs4, vs36 - xxswapd vs5, vs37 - xxswapd vs6, vs38 - xxswapd vs7, vs39 - xxswapd vs8, vs40 - xxswapd vs9, vs41 - xxswapd vs10, vs42 - xxswapd vs11, vs43 - xxswapd vs12, vs44 - xxswapd vs13, vs45 - xxswapd vs14, vs46 - xxswapd vs15, vs47 + XXSWAPD(vs0,vs32) + XXSWAPD(vs1,vs33) + XXSWAPD(vs2,vs34) + XXSWAPD(vs3,vs35) + XXSWAPD(vs4,vs36) + XXSWAPD(vs5,vs37) + XXSWAPD(vs6,vs38) + XXSWAPD(vs7,vs39) + XXSWAPD(vs8,vs40) + XXSWAPD(vs9,vs41) + XXSWAPD(vs10,vs42) + XXSWAPD(vs11,vs43) + XXSWAPD(vs12,vs44) + XXSWAPD(vs13,vs45) + XXSWAPD(vs14,vs46) + XXSWAPD(vs15,vs47) //############### LOAD B ####################### @@ -4215,23 +4539,31 @@ stxsdx vs46, o16, T1 stxsdx vs47, o24, T1 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 8x1 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_8x1', ` +#else .macro SOLVE_LT_8x1 +#endif - xxswapd vs0, vs32 - xxswapd vs1, vs33 - xxswapd vs2, vs34 - xxswapd vs3, vs35 - xxswapd vs4, vs36 - xxswapd vs5, vs37 - xxswapd vs6, vs38 - xxswapd vs7, vs39 + XXSWAPD(vs0,vs32) + XXSWAPD(vs1,vs33) + XXSWAPD(vs2,vs34) + XXSWAPD(vs3,vs35) + XXSWAPD(vs4,vs36) + XXSWAPD(vs5,vs37) + XXSWAPD(vs6,vs38) + XXSWAPD(vs7,vs39) //############### LOAD B ####################### @@ -4443,19 +4775,27 @@ stxsdx vs38, o16, T1 stxsdx vs39, o24, T1 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 4x1 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_4x1', ` +#else .macro SOLVE_LT_4x1 +#endif - xxswapd vs0, vs32 - xxswapd vs1, vs33 - xxswapd vs2, vs34 - xxswapd vs3, vs35 + XXSWAPD(vs0,vs32) + XXSWAPD(vs1,vs33) + XXSWAPD(vs2,vs34) + XXSWAPD(vs3,vs35) //############### LOAD B ####################### @@ -4546,17 +4886,25 @@ stxsdx vs34, o16, T1 stxsdx vs35, o24, T1 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 2x1 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_2x1', ` +#else .macro SOLVE_LT_2x1 +#endif - xxswapd vs0, vs32 - xxswapd vs1, vs33 + XXSWAPD(vs0,vs32) + XXSWAPD(vs1,vs33) //############### LOAD B ####################### @@ -4609,16 +4957,24 @@ stxsdx vs32, o0, T1 stxsdx vs33, o8, T1 +#if defined(_AIX) +') +#else .endm +#endif /*########################################################################################## SOLVE_LT 1x1 ##########################################################################################*/ +#if defined(_AIX) +define(`SOLVE_LT_1x1', ` +#else .macro SOLVE_LT_1x1 +#endif - xxswapd vs0, vs32 + XXSWAPD(vs0,vs32) //############### LOAD B ####################### @@ -4655,5 +5011,9 @@ stxsdx vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/idamax.c b/kernel/power/idamax.c index 5bdc0a13c..623ac9fb0 100644 --- a/kernel/power/idamax.c +++ b/kernel/power/idamax.c @@ -58,8 +58,8 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "lxvd2x 47, %[i48],%[ptr_tmp] \n\t" "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" "xxlor 40,%x[start],%x[start] \n\t" //{ 1,0} vs40 | v8 "vaddudm 9,8,%[adder] \n\t" //{3,2} vs41 @@ -69,7 +69,7 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "vaddudm 11,10,%[adder] \n\t" //{7,6} vs43 "xxlxor 39,39,39 \n\t" // vs39 vec_max_value "vaddudm 4,11, %[adder] \n\t" // {9,8} -{8;8} vs36 | v4 - "xxspltd 36,36,0 \n\t" + XXSPLTD_S(36,36,0) "xvabsdp 44, 44 \n\t" "xvabsdp 45, 45 \n\t" @@ -77,21 +77,21 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //jump first half forward - "b 2f \n\t" + "b two%= \n\t" //=================================================================== - ".p2align 5 \n\t" + ".align 5 \n\t" - "1: \n\t" + "one%=: \n\t" "xvcmpgtdp 2,45,44 \n\t " "xvcmpgtdp 3,47,46 \n\t " "xvcmpgtdp 4,49,48 \n\t " - "xvcmpgtdp 5,51,50 \n\t" + "xvcmpgtdp 5,7,6 \n\t" "xxsel 32,40,41,2 \n\t" "xxsel 0,44,45,2 \n\t" @@ -100,7 +100,7 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xxsel 34,40,41,4 \n\t" "xxsel 45,48,49,4 \n\t" "xxsel 35,42,43,5 \n\t" - "xxsel 47,50,51,5 \n\t" + "xxsel 47,6,7,5 \n\t" "xvcmpgtdp 2, 1,0 \n\t" "xvcmpgtdp 3,47, 45 \n\t" @@ -134,8 +134,8 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "vaddudm 1,1,5 \n\t" // get real index for first bigger - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" //compare with previous to get vec_max_index(v6 | vs38 ) and vec_max_value (vs39) "xvcmpgtdp 2, 3,39 \n\t" @@ -155,16 +155,16 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //<-----------jump here from first load - "2: \n\t" + "two%=: \n\t" "xvcmpgtdp 2,45,44 \n\t " "xvcmpgtdp 3,47,46 \n\t " "xvcmpgtdp 4,49,48 \n\t " - "xvcmpgtdp 5,51,50 \n\t" + "xvcmpgtdp 5,7,6 \n\t" "xxsel 32,40,41,2 \n\t" "xxsel 0,44,45,2 \n\t" @@ -173,7 +173,7 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xxsel 34,40,41,4 \n\t" "xxsel 45,48,49,4 \n\t" "xxsel 35,42,43,5 \n\t" - "xxsel 47,50,51,5 \n\t" + "xxsel 47,6,7,5 \n\t" "xvcmpgtdp 2, 1,0 \n\t" "xvcmpgtdp 3,47, 45 \n\t" @@ -203,8 +203,8 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "vaddudm 1,1,5 \n\t" // get real index for first bigger - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" @@ -226,21 +226,21 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //decrement n "addic. %[n], %[n], -32 \n\t" //Loop back if >0 - "bgt+ 1b \n\t" + "bgt+ one%= \n\t" //============================================================================== "xvcmpgtdp 2,45,44 \n\t " "xvcmpgtdp 3,47,46 \n\t " "xvcmpgtdp 4,49,48 \n\t " - "xvcmpgtdp 5,51,50 \n\t" + "xvcmpgtdp 5,7,6 \n\t" "xxsel 32,40,41,2 \n\t" "xxsel 0,44,45,2 \n\t" @@ -249,7 +249,7 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xxsel 34,40,41,4 \n\t" "xxsel 45,48,49,4 \n\t" "xxsel 35,42,43,5 \n\t" - "xxsel 47,50,51,5 \n\t" + "xxsel 47,6,7,5 \n\t" "xvcmpgtdp 2, 1,0 \n\t" "xvcmpgtdp 3,47, 45 \n\t" @@ -276,28 +276,28 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { ///////extract max value and max index from vector - "xxspltd 32,38,1 \n\t" - "xxspltd 40,39,1 \n\t" + XXSPLTD_S(32,38,1) + XXSPLTD_S(40,39,1) "xvcmpeqdp. 2, 40,39 \n\t" //cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely //0b001110=14 - "bc 14,24, 3f \n\t" + "bc 14,24, three%= \n\t" "xvcmpgtdp 4, 40,39 \n\t" "xxsel 0,39,40,4 \n\t" "xxsel 1,38,32,4 \n\t" "stxsdx 0,0,%[ptr_maxf] \n\t" - "b 4f \n\t" + "b four%= \n\t" - "3: \n\t" + "three%=: \n\t" //if elements value are equal then choose minimum index - "xxspltd 0,40,0 \n\t" + XXSPLTD_S(0,40,0) "vminud 0,0,6 \n\t" //vs32 vs38 "xxlor 1,32,32 \n\t" "stxsdx 0,0,%[ptr_maxf] \n\t" - "4: \n\t" + "four%=: \n\t" "mfvsrd %[index],1 \n\t" : [maxf] "=m"(*maxf),[ptr_tmp] "+&b"(x),[index] "=r"(index), [n] "+&r"(n) @@ -306,7 +306,7 @@ static BLASLONG diamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) { [i64] "b"(64), [i80] "b"(80), [i96] "b"(96), [i112] "b"(112), [start] "v"(start), [adder] "v"(temp_add_index) : "cc", "vs0", "vs1","vs2","vs3", "vs4","vs5","vs32", "vs33", "vs34", "vs35", "vs36", - "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs50", "vs51" + "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs6", "vs7" ); diff --git a/kernel/power/idamin.c b/kernel/power/idamin.c index 7fe0f8a33..b2705f2fa 100644 --- a/kernel/power/idamin.c +++ b/kernel/power/idamin.c @@ -58,8 +58,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "lxvd2x 47, %[i48],%[ptr_tmp] \n\t" "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" "xxlor 40,%x[start],%x[start] \n\t" //{ 1,0} vs40 | v8 "vaddudm 9,8, %[adder] \n\t" //{3,2} vs41 @@ -69,7 +69,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "vaddudm 11,10,%[adder] \n\t" //{7,6} vs43 "lxvdsx 39,0,%[ptr_minf] \n\t" // vs39 vec_min_value "vaddudm 4,11, %[adder] \n\t" // {9,8} -{8;8} vs36 | v4 - "xxspltd 36,36,0 \n\t" + XXSPLTD_S(36,36,0) "xvabsdp 39, 39 \n\t" "xvabsdp 44, 44 \n\t" @@ -78,21 +78,21 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //jump first half forward - "b 2f \n\t" + "b two%= \n\t" //=================================================================== - ".p2align 5 \n\t" + ".align 5 \n\t" - "1: \n\t" + "one%=: \n\t" "xvcmpgtdp 2,44,45 \n\t " "xvcmpgtdp 3,46,47 \n\t " "xvcmpgtdp 4,48,49 \n\t " - "xvcmpgtdp 5,50,51 \n\t" + "xvcmpgtdp 5,6,7 \n\t" "xxsel 32,40,41,2 \n\t" "xxsel 0,44,45,2 \n\t" @@ -101,7 +101,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "xxsel 34,40,41,4 \n\t" "xxsel 45,48,49,4 \n\t" "xxsel 35,42,43,5 \n\t" - "xxsel 47,50,51,5 \n\t" + "xxsel 47,6,7,5 \n\t" "xvcmpgtdp 2,0, 1 \n\t" "xvcmpgtdp 3, 45,47 \n\t" @@ -135,8 +135,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "vaddudm 1,1,5 \n\t" // get real index for first smaller - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" //compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39) "xvcmpgtdp 2,39, 3 \n\t" @@ -156,16 +156,16 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //<-----------jump here from first load - "2: \n\t" + "two%=: \n\t" "xvcmpgtdp 2,44,45 \n\t " "xvcmpgtdp 3,46,47 \n\t " "xvcmpgtdp 4,48,49 \n\t " - "xvcmpgtdp 5,50,51 \n\t" + "xvcmpgtdp 5,6,7 \n\t" "xxsel 32,40,41,2 \n\t" "xxsel 0,44,45,2 \n\t" @@ -174,7 +174,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "xxsel 34,40,41,4 \n\t" "xxsel 45,48,49,4 \n\t" "xxsel 35,42,43,5 \n\t" - "xxsel 47,50,51,5 \n\t" + "xxsel 47,6,7,5 \n\t" "xvcmpgtdp 2,0, 1 \n\t" "xvcmpgtdp 3, 45,47 \n\t" @@ -204,8 +204,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "vaddudm 1,1,5 \n\t" // get real index for first smaller - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" @@ -227,21 +227,21 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //decrement n "addic. %[n], %[n], -32 \n\t" //Loop back if >0 - "bgt+ 1b \n\t" + "bgt+ one%= \n\t" //============================================================================== "xvcmpgtdp 2,44,45 \n\t " "xvcmpgtdp 3,46,47 \n\t " "xvcmpgtdp 4,48,49 \n\t " - "xvcmpgtdp 5,50,51 \n\t" + "xvcmpgtdp 5,6,7 \n\t" "xxsel 32,40,41,2 \n\t" "xxsel 0,44,45,2 \n\t" @@ -250,7 +250,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { "xxsel 34,40,41,4 \n\t" "xxsel 45,48,49,4 \n\t" "xxsel 35,42,43,5 \n\t" - "xxsel 47,50,51,5 \n\t" + "xxsel 47,6,7,5 \n\t" "xvcmpgtdp 2,0, 1 \n\t" "xvcmpgtdp 3, 45,47 \n\t" @@ -277,28 +277,28 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { ///////extract min value and min index from vector - "xxspltd 32,38,1 \n\t" - "xxspltd 40,39,1 \n\t" + XXSPLTD_S(32,38,1) + XXSPLTD_S(40,39,1) "xvcmpeqdp. 2, 40,39 \n\t" //cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely //0b001110=14 - "bc 14,24, 3f \n\t" + "bc 14,24, three%= \n\t" "xvcmpgtdp 4,39, 40 \n\t" "xxsel 0,39,40,4 \n\t" "xxsel 1,38,32,4 \n\t" "stxsdx 0,0,%[ptr_minf] \n\t" - "b 4f \n\t" + "b four%= \n\t" - "3: \n\t" + "three%=: \n\t" //if elements value are equal then choose minimum index - "xxspltd 0,40,0 \n\t" + XXSPLTD_S(0,40,0) "vminud 0,0,6 \n\t" //vs32 vs38 "xxlor 1,32,32 \n\t" "stxsdx 0,0,%[ptr_minf] \n\t" - "4: \n\t" + "four%=: \n\t" "mfvsrd %[index],1 \n\t" : [minf] "=m"(*minf),[ptr_tmp] "+&b"(x),[index] "=r"(index), [n] "+&r"(n) @@ -307,7 +307,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) { [i64] "b"(64), [i80] "b"(80), [i96] "b"(96), [i112] "b"(112), [start] "v"(start), [adder] "v"(temp_add_index) : "cc", "vs0", "vs1","vs2","vs3", "vs4","vs5","vs32", "vs33", "vs34", "vs35", "vs36", - "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs50", "vs51" + "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs6", "vs7" ); return index; diff --git a/kernel/power/izamax.c b/kernel/power/izamax.c index cfe78c8c0..339c3ccde 100644 --- a/kernel/power/izamax.c +++ b/kernel/power/izamax.c @@ -56,8 +56,8 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "lxvd2x 47, %[i48],%[ptr_tmp] \n\t" "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" "xxlor 40,%x[start],%x[start] \n\t" //{ 1,0} vs40 | v8 "vaddudm 9,8,%[adder] \n\t" //{3,2} vs41 @@ -67,7 +67,7 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "vaddudm 11,10,%[adder] \n\t" //{7,6} vs43 "xxlxor 39,39,39 \n\t" // vs39 vec_max_value is zero "vaddudm 4,11, %[adder] \n\t" // {9,8} -{8;8} vs36 | v4 - "xxspltd 36,36,0 \n\t" + XXSPLTD_S(36,36,0) @@ -77,24 +77,24 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //jump first half forward - "b 2f \n\t" + "b two%= \n\t" - ".p2align 5 \n\t" - "1: \n\t" + ".align 5 \n\t" + "one%=: \n\t" - "xxmrghd 0,44,45 \n\t" - "xxmrgld 1,44,45 \n\t" - "xxmrghd 2,46,47 \n\t" - "xxmrgld 3,46,47 \n\t" - "xxmrghd 4,48,49 \n\t" - "xxmrgld 5,48,49 \n\t" - "xxmrghd 44,50,51 \n\t" - "xxmrgld 45,50,51 \n\t" + XXMRGHD_S(0,44,45) + XXMRGLD_S(1,44,45) + XXMRGHD_S(2,46,47) + XXMRGLD_S(3,46,47) + XXMRGHD_S(4,48,49) + XXMRGLD_S(5,48,49) + XXMRGHD_S(44,6,7) + XXMRGLD_S(45,6,7) "xvadddp 46, 0,1 \n\t" "xvadddp 47, 2,3 \n\t" @@ -103,15 +103,15 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { - "xvcmpgtdp 50,47,46 \n\t " - "xvcmpgtdp 51,49,48 \n\t " + "xvcmpgtdp 6,47,46 \n\t " + "xvcmpgtdp 7,49,48 \n\t " "addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t" - "xxsel 32,40,41,50 \n\t" - "xxsel 0,46,47,50 \n\t" - "xxsel 33,42,43,51 \n\t" - "xxsel 1,48,49,51 \n\t" + "xxsel 32,40,41,6 \n\t" + "xxsel 0,46,47,6 \n\t" + "xxsel 33,42,43,7 \n\t" + "xxsel 1,48,49,7 \n\t" "lxvd2x 44, 0,%[ptr_tmp] \n\t" "lxvd2x 45, %[i16],%[ptr_tmp] \n\t" @@ -133,8 +133,8 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" //select with previous "xxsel 38,38,32,4 \n\t" "xxsel 39,39,3,4 \n\t" @@ -148,35 +148,35 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //>>/////////////////////////////// half start - "2: \n\t" - "xxmrghd 0,44,45 \n\t" - "xxmrgld 1,44,45 \n\t" - "xxmrghd 2,46,47 \n\t" - "xxmrgld 3,46,47 \n\t" - "xxmrghd 4,48,49 \n\t" - "xxmrgld 5,48,49 \n\t" - "xxmrghd 44,50,51 \n\t" - "xxmrgld 45,50,51 \n\t" + "two%=: \n\t" + XXMRGHD_S(0,44,45) + XXMRGLD_S(1,44,45) + XXMRGHD_S(2,46,47) + XXMRGLD_S(3,46,47) + XXMRGHD_S(4,48,49) + XXMRGLD_S(5,48,49) + XXMRGHD_S(44,6,7) + XXMRGLD_S(45,6,7) "xvadddp 46, 0,1 \n\t" "xvadddp 47, 2,3 \n\t" "xvadddp 48, 4,5 \n\t" "xvadddp 49, 44,45 \n\t" - "xvcmpgtdp 50,47,46 \n\t " - "xvcmpgtdp 51,49,48 \n\t " + "xvcmpgtdp 6,47,46 \n\t " + "xvcmpgtdp 7,49,48 \n\t " "addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t" - "xxsel 32,40,41,50 \n\t" - "xxsel 0,46,47,50 \n\t" - "xxsel 33,42,43,51 \n\t" - "xxsel 1,48,49,51 \n\t" + "xxsel 32,40,41,6 \n\t" + "xxsel 0,46,47,6 \n\t" + "xxsel 33,42,43,7 \n\t" + "xxsel 1,48,49,7 \n\t" "lxvd2x 44, 0,%[ptr_tmp] \n\t" "lxvd2x 45, %[i16],%[ptr_tmp] \n\t" @@ -198,8 +198,8 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" //select with previous "xxsel 38,38,32,4 \n\t" "xxsel 39,39,3,4 \n\t" @@ -211,24 +211,24 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //decrement n "addic. %[n], %[n], -16 \n\t" //Loop back if >0 - "bgt+ 1b \n\t" + "bgt+ one%= \n\t" - "xxmrghd 0,44,45 \n\t" - "xxmrgld 1,44,45 \n\t" - "xxmrghd 2,46,47 \n\t" - "xxmrgld 3,46,47 \n\t" - "xxmrghd 4,48,49 \n\t" - "xxmrgld 5,48,49 \n\t" - "xxmrghd 44,50,51 \n\t" - "xxmrgld 45,50,51 \n\t" + XXMRGHD_S(0,44,45) + XXMRGLD_S(1,44,45) + XXMRGHD_S(2,46,47) + XXMRGLD_S(3,46,47) + XXMRGHD_S(4,48,49) + XXMRGLD_S(5,48,49) + XXMRGHD_S(44,6,7) + XXMRGLD_S(45,6,7) "xvadddp 46, 0,1 \n\t" "xvadddp 47, 2,3 \n\t" @@ -237,13 +237,13 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { - "xvcmpgtdp 50,47,46 \n\t " - "xvcmpgtdp 51,49,48 \n\t " + "xvcmpgtdp 6,47,46 \n\t " + "xvcmpgtdp 7,49,48 \n\t " - "xxsel 32,40,41,50 \n\t" - "xxsel 0,46,47,50 \n\t" - "xxsel 33,42,43,51 \n\t" - "xxsel 1,48,49,51 \n\t" + "xxsel 32,40,41,6 \n\t" + "xxsel 0,46,47,6 \n\t" + "xxsel 33,42,43,7 \n\t" + "xxsel 1,48,49,7 \n\t" "xvcmpgtdp 2,1,0 \n\t " "xxsel 32,32,33,2 \n\t" @@ -262,28 +262,28 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { ///////extract max value and max index from vector - "xxspltd 32,38,1 \n\t" - "xxspltd 40,39,1 \n\t" + XXSPLTD_S(32,38,1) + XXSPLTD_S(40,39,1) "xvcmpeqdp. 2, 40,39 \n\t" //cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely //0b001110=14 - "bc 14,24, 3f \n\t" + "bc 14,24, three%= \n\t" "xvcmpgtdp 4, 40,39 \n\t" "xxsel 0,39,40,4 \n\t" "xxsel 1,38,32,4 \n\t" "stxsdx 0,0,%[ptr_maxf] \n\t" - "b 4f \n\t" + "b four%= \n\t" - "3: \n\t" + "three%=: \n\t" //if elements value are equal then choose minimum index - "xxspltd 0,40,0 \n\t" + XXSPLTD_S(0,40,0) "vminud 0,0,6 \n\t" //vs32 vs38 "xxlor 1,32,32 \n\t" "stxsdx 0,0,%[ptr_maxf] \n\t" - "4: \n\t" + "four%=: \n\t" "mfvsrd %[index],1 \n\t" : [maxf] "=m"(*maxf),[ptr_tmp] "+&b"(x),[index] "=r"(index), [n] "+&r"(n) @@ -292,7 +292,7 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) { [i64] "b"(64), [i80] "b"(80), [i96] "b"(96), [i112] "b"(112), [start] "v"(start), [adder] "v"(temp_add_index) : "cc", "vs0", "vs1","vs2","vs3", "vs4","vs5","vs32", "vs33", "vs34", "vs35", "vs36", - "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs50", "vs51" + "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs6", "vs7" ); return index; diff --git a/kernel/power/izamin.c b/kernel/power/izamin.c index 1ffa3ba8b..6d0d15547 100644 --- a/kernel/power/izamin.c +++ b/kernel/power/izamin.c @@ -54,8 +54,8 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "lxvd2x 47, %[i48],%[ptr_tmp] \n\t" "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" "xxlor 40,%x[start],%x[start] \n\t" //{ 1,0} vs40 | v8 "vaddudm 9,8,%[adder] \n\t" //{3,2} vs41 @@ -65,7 +65,7 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "vaddudm 11,10,%[adder] \n\t" //{7,6} vs43 "lxvdsx 39,0,%[ptr_minf] \n\t" // vs39 vec_min_value "vaddudm 4,11, %[adder] \n\t" // {9,8} -{8;8} vs36 | v4 - "xxspltd 36,36,0 \n\t" + XXSPLTD_S(36,36,0) @@ -75,24 +75,24 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //jump first half forward - "b 2f \n\t" + "b two%= \n\t" - ".p2align 5 \n\t" - "1: \n\t" + ".align 5 \n\t" + "one%=: \n\t" - "xxmrghd 0,44,45 \n\t" - "xxmrgld 1,44,45 \n\t" - "xxmrghd 2,46,47 \n\t" - "xxmrgld 3,46,47 \n\t" - "xxmrghd 4,48,49 \n\t" - "xxmrgld 5,48,49 \n\t" - "xxmrghd 44,50,51 \n\t" - "xxmrgld 45,50,51 \n\t" + XXMRGHD_S(0,44,45) + XXMRGLD_S(1,44,45) + XXMRGHD_S(2,46,47) + XXMRGLD_S(3,46,47) + XXMRGHD_S(4,48,49) + XXMRGLD_S(5,48,49) + XXMRGHD_S(44,6,7) + XXMRGLD_S(45,6,7) "xvadddp 46, 0,1 \n\t" "xvadddp 47, 2,3 \n\t" @@ -101,15 +101,15 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { - "xvcmpgtdp 50,46,47 \n\t " - "xvcmpgtdp 51,48,49 \n\t " + "xvcmpgtdp 6,46,47 \n\t " + "xvcmpgtdp 7,48,49 \n\t " "addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t" - "xxsel 32,40,41,50 \n\t" - "xxsel 0,46,47,50 \n\t" - "xxsel 33,42,43,51 \n\t" - "xxsel 1,48,49,51 \n\t" + "xxsel 32,40,41,6 \n\t" + "xxsel 0,46,47,6 \n\t" + "xxsel 33,42,43,7 \n\t" + "xxsel 1,48,49,7 \n\t" "lxvd2x 44, 0,%[ptr_tmp] \n\t" "lxvd2x 45, %[i16],%[ptr_tmp] \n\t" @@ -131,8 +131,8 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" //select with previous "xxsel 38,38,32,4 \n\t" "xxsel 39,39,3,4 \n\t" @@ -146,35 +146,35 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //>>/////////////////////////////// half start - "2: \n\t" - "xxmrghd 0,44,45 \n\t" - "xxmrgld 1,44,45 \n\t" - "xxmrghd 2,46,47 \n\t" - "xxmrgld 3,46,47 \n\t" - "xxmrghd 4,48,49 \n\t" - "xxmrgld 5,48,49 \n\t" - "xxmrghd 44,50,51 \n\t" - "xxmrgld 45,50,51 \n\t" + "two%=: \n\t" + XXMRGHD_S(0,44,45) + XXMRGLD_S(1,44,45) + XXMRGHD_S(2,46,47) + XXMRGLD_S(3,46,47) + XXMRGHD_S(4,48,49) + XXMRGLD_S(5,48,49) + XXMRGHD_S(44,6,7) + XXMRGLD_S(45,6,7) "xvadddp 46, 0,1 \n\t" "xvadddp 47, 2,3 \n\t" "xvadddp 48, 4,5 \n\t" "xvadddp 49, 44,45 \n\t" - "xvcmpgtdp 50,46,47 \n\t " - "xvcmpgtdp 51,48,49 \n\t " + "xvcmpgtdp 6,46,47 \n\t " + "xvcmpgtdp 7,48,49 \n\t " "addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t" - "xxsel 32,40,41,50 \n\t" - "xxsel 0,46,47,50 \n\t" - "xxsel 33,42,43,51 \n\t" - "xxsel 1,48,49,51 \n\t" + "xxsel 32,40,41,6 \n\t" + "xxsel 0,46,47,6 \n\t" + "xxsel 33,42,43,7 \n\t" + "xxsel 1,48,49,7 \n\t" "lxvd2x 44, 0,%[ptr_tmp] \n\t" "lxvd2x 45, %[i16],%[ptr_tmp] \n\t" @@ -196,8 +196,8 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "lxvd2x 48, %[i64],%[ptr_tmp] \n\t" "lxvd2x 49, %[i80],%[ptr_tmp] \n\t" - "lxvd2x 50, %[i96],%[ptr_tmp] \n\t" - "lxvd2x 51,%[i112],%[ptr_tmp] \n\t" + "lxvd2x 6, %[i96],%[ptr_tmp] \n\t" + "lxvd2x 7,%[i112],%[ptr_tmp] \n\t" //select with previous "xxsel 38,38,32,4 \n\t" "xxsel 39,39,3,4 \n\t" @@ -209,24 +209,24 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { "xvabsdp 47, 47 \n\t" "xvabsdp 48, 48 \n\t" "xvabsdp 49, 49 \n\t" - "xvabsdp 50, 50 \n\t" - "xvabsdp 51, 51 \n\t" + "xvabsdp 6, 6 \n\t" + "xvabsdp 7, 7 \n\t" //decrement n "addic. %[n], %[n], -16 \n\t" //Loop back if >0 - "bgt+ 1b \n\t" + "bgt+ one%= \n\t" - "xxmrghd 0,44,45 \n\t" - "xxmrgld 1,44,45 \n\t" - "xxmrghd 2,46,47 \n\t" - "xxmrgld 3,46,47 \n\t" - "xxmrghd 4,48,49 \n\t" - "xxmrgld 5,48,49 \n\t" - "xxmrghd 44,50,51 \n\t" - "xxmrgld 45,50,51 \n\t" + XXMRGHD_S(0,44,45) + XXMRGLD_S(1,44,45) + XXMRGHD_S(2,46,47) + XXMRGLD_S(3,46,47) + XXMRGHD_S(4,48,49) + XXMRGLD_S(5,48,49) + XXMRGHD_S(44,6,7) + XXMRGLD_S(45,6,7) "xvadddp 46, 0,1 \n\t" "xvadddp 47, 2,3 \n\t" @@ -235,13 +235,13 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { - "xvcmpgtdp 50,46,47 \n\t " - "xvcmpgtdp 51,48,49 \n\t " + "xvcmpgtdp 6,46,47 \n\t " + "xvcmpgtdp 7,48,49 \n\t " - "xxsel 32,40,41,50 \n\t" - "xxsel 0,46,47,50 \n\t" - "xxsel 33,42,43,51 \n\t" - "xxsel 1,48,49,51 \n\t" + "xxsel 32,40,41,6 \n\t" + "xxsel 0,46,47,6 \n\t" + "xxsel 33,42,43,7 \n\t" + "xxsel 1,48,49,7 \n\t" "xvcmpgtdp 2,0,1 \n\t " "xxsel 32,32,33,2 \n\t" @@ -260,28 +260,28 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { ///////extract min value and min index from vector - "xxspltd 32,38,1 \n\t" - "xxspltd 40,39,1 \n\t" + XXSPLTD_S(32,38,1) + XXSPLTD_S(40,39,1) "xvcmpeqdp. 2, 40,39 \n\t" //cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely //0b001110=14 - "bc 14,24, 3f \n\t" + "bc 14,24, three%= \n\t" "xvcmpgtdp 4,39, 40 \n\t" "xxsel 0,39,40,4 \n\t" "xxsel 1,38,32,4 \n\t" "stxsdx 0,0,%[ptr_minf] \n\t" - "b 4f \n\t" + "b four%= \n\t" - "3: \n\t" + "three%=: \n\t" //if elements value are equal then choose minimum index - "xxspltd 0,40,0 \n\t" + XXSPLTD_S(0,40,0) "vminud 0,0,6 \n\t" //vs32 vs38 "xxlor 1,32,32 \n\t" "stxsdx 0,0,%[ptr_minf] \n\t" - "4: \n\t" + "four%=: \n\t" "mfvsrd %[index],1 \n\t" : [minf] "=m"(*minf),[ptr_tmp] "+&b"(x),[index] "=r"(index), [n] "+&r"(n) @@ -290,7 +290,7 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) { [i64] "b"(64), [i80] "b"(80), [i96] "b"(96), [i112] "b"(112), [start] "v"(start), [adder] "v"(temp_add_index) : "cc", "vs0", "vs1","vs2","vs3", "vs4","vs5","vs32", "vs33", "vs34", "vs35", "vs36", - "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs50", "vs51" + "vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs6", "vs7" ); return index; diff --git a/kernel/power/lock.c b/kernel/power/lock.c index 51348d63c..1c1b006b0 100644 --- a/kernel/power/lock.c +++ b/kernel/power/lock.c @@ -46,10 +46,10 @@ static void __inline blas_lock(volatile BLASULONG *address){ " .machine \"any\" ;" "0: lwarx %0,0, %1 ;" " cmpwi 0,%0,0;" - " bne 1f;" + " bne one%=;" " stwcx. %2,0, %1 ;" " bne- 0b;" - "1: " + "one%=: " : "=&r"(ret) : "r"(address), "r" (val) : "cr0", "memory"); diff --git a/kernel/power/sasum_microk_power8.c b/kernel/power/sasum_microk_power8.c index 4bb515de8..aa465c38e 100644 --- a/kernel/power/sasum_microk_power8.c +++ b/kernel/power/sasum_microk_power8.c @@ -68,10 +68,10 @@ static float sasum_kernel_32 (long n, float *x) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvabssp 48, 40 \n\t" "xvabssp 49, 41 \n\t" @@ -108,9 +108,9 @@ static float sasum_kernel_32 (long n, float *x) "xvaddsp 38, 38, %x5 \n\t" "xvaddsp 39, 39, %x6 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvabssp 48, 40 \n\t" "xvabssp 49, 41 \n\t" diff --git a/kernel/power/scopy_microk_power8.c b/kernel/power/scopy_microk_power8.c index 7a54d5e1e..da39789b1 100644 --- a/kernel/power/scopy_microk_power8.c +++ b/kernel/power/scopy_microk_power8.c @@ -51,10 +51,10 @@ static void scopy_kernel_32 (long n, float *x, float *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "stxvd2x 40, 0, %3 \n\t" "stxvd2x 41, %5, %3 \n\t" @@ -77,9 +77,9 @@ static void scopy_kernel_32 (long n, float *x, float *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "stxvd2x 40, 0, %3 \n\t" "stxvd2x 41, %5, %3 \n\t" diff --git a/kernel/power/sdot_microk_power8.c b/kernel/power/sdot_microk_power8.c index bfe100c8b..a8db6a8d6 100644 --- a/kernel/power/sdot_microk_power8.c +++ b/kernel/power/sdot_microk_power8.c @@ -78,10 +78,10 @@ static float sdot_kernel_16 (long n, float *x, float *y) "addi %3, %3, 128 \n\t" "addic. %1, %1, -32 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmaddasp 32, 40, 48 \n\t" "lxvd2x 40, 0, %2 \n\t" @@ -112,9 +112,9 @@ static float sdot_kernel_16 (long n, float *x, float *y) "addi %3, %3, 128 \n\t" "addic. %1, %1, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmaddasp 32, 40, 48 \n\t" "xvmaddasp 33, 41, 49 \n\t" diff --git a/kernel/power/sgemm_macros_16x8_power8.S b/kernel/power/sgemm_macros_16x8_power8.S index 98414857f..9bcfca827 100644 --- a/kernel/power/sgemm_macros_16x8_power8.S +++ b/kernel/power/sgemm_macros_16x8_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=8 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x16_1', ` +#else .macro LOAD8x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -63,9 +67,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_I1', ` +#else .macro KERNEL8x16_I1 +#endif lxvw4x vs4, o0, AO @@ -133,9 +145,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_1', ` +#else .macro KERNEL8x16_1 +#endif lxvw4x vs4, o0, AO @@ -203,9 +223,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_2', ` +#else .macro KERNEL8x16_2 +#endif lxvw4x vs0, o0, AO @@ -273,9 +301,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_E2', ` +#else .macro KERNEL8x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -319,9 +355,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_SUBI1', ` +#else .macro KERNEL8x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -389,9 +433,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_SUB1', ` +#else .macro KERNEL8x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -459,9 +511,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x16', ` +#else .macro SAVE8x16 +#endif mr T1, CO @@ -698,14 +758,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x8_1', ` +#else .macro LOAD8x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -728,9 +796,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_I1', ` +#else .macro KERNEL8x8_I1 +#endif lxvw4x vs4, o0, AO @@ -780,9 +856,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_1', ` +#else .macro KERNEL8x8_1 +#endif lxvw4x vs4, o0, AO @@ -832,9 +916,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_2', ` +#else .macro KERNEL8x8_2 +#endif lxvw4x vs0, o0, AO @@ -884,9 +976,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_E2', ` +#else .macro KERNEL8x8_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -914,9 +1014,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_SUBI1', ` +#else .macro KERNEL8x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -966,9 +1074,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_SUB1', ` +#else .macro KERNEL8x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -1018,9 +1134,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x8', ` +#else .macro SAVE8x8 +#endif mr T1, CO @@ -1193,14 +1317,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x4_1', ` +#else .macro LOAD8x4_1 +#endif lxvw4x vs0, o0, AO @@ -1222,9 +1354,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_I1', ` +#else .macro KERNEL8x4_I1 +#endif lxvw4x vs4, o0, AO @@ -1265,9 +1405,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_1', ` +#else .macro KERNEL8x4_1 +#endif lxvw4x vs4, o0, AO @@ -1308,9 +1456,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_2', ` +#else .macro KERNEL8x4_2 +#endif lxvw4x vs0, o0, AO @@ -1351,9 +1507,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_E2', ` +#else .macro KERNEL8x4_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -1373,9 +1537,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_SUBI1', ` +#else .macro KERNEL8x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -1416,9 +1588,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_SUB1', ` +#else .macro KERNEL8x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -1459,9 +1639,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x4', ` +#else .macro SAVE8x4 +#endif mr T1, CO @@ -1602,14 +1790,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x2_1', ` +#else .macro LOAD8x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -1633,9 +1829,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_I1', ` +#else .macro KERNEL8x2_I1 +#endif lxsspx vs4, o0, AO @@ -1686,9 +1890,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_1', ` +#else .macro KERNEL8x2_1 +#endif lxsspx vs4, o0, AO @@ -1739,9 +1951,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_2', ` +#else .macro KERNEL8x2_2 +#endif lxsspx vs0, o0, AO @@ -1792,9 +2012,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_E2', ` +#else .macro KERNEL8x2_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -1822,9 +2050,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_SUBI1', ` +#else .macro KERNEL8x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -1875,9 +2111,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_SUB1', ` +#else .macro KERNEL8x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -1928,9 +2172,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x2', ` +#else .macro SAVE8x2 +#endif mr T1, CO @@ -2103,14 +2355,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x1_1', ` +#else .macro LOAD8x1_1 +#endif lxsspx vs0, o0, AO @@ -2133,9 +2393,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 128 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_I1', ` +#else .macro KERNEL8x1_I1 +#endif lxsspx vs4, o0, AO @@ -2177,9 +2445,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_1', ` +#else .macro KERNEL8x1_1 +#endif lxsspx vs4, o0, AO @@ -2221,9 +2497,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_2', ` +#else .macro KERNEL8x1_2 +#endif lxsspx vs0, o0, AO @@ -2265,9 +2549,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_E2', ` +#else .macro KERNEL8x1_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -2287,9 +2579,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_SUBI1', ` +#else .macro KERNEL8x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -2331,9 +2631,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_SUB1', ` +#else .macro KERNEL8x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -2375,9 +2683,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x1', ` +#else .macro SAVE8x1 +#endif mr T1, CO @@ -2518,14 +2834,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x16_1', ` +#else .macro LOAD4x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -2543,9 +2867,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_I1', ` +#else .macro KERNEL4x16_I1 +#endif lxvw4x vs4, o0, AO @@ -2586,9 +2918,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_1', ` +#else .macro KERNEL4x16_1 +#endif lxvw4x vs4, o0, AO @@ -2629,9 +2969,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_2', ` +#else .macro KERNEL4x16_2 +#endif lxvw4x vs0, o0, AO @@ -2672,9 +3020,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_E2', ` +#else .macro KERNEL4x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -2698,9 +3054,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUBI1', ` +#else .macro KERNEL4x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -2741,9 +3105,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUB1', ` +#else .macro KERNEL4x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -2784,9 +3156,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x16', ` +#else .macro SAVE4x16 +#endif mr T1, CO @@ -2907,14 +3287,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x8_1', ` +#else .macro LOAD4x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -2930,9 +3318,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_I1', ` +#else .macro KERNEL4x8_I1 +#endif lxvw4x vs4, o0, AO @@ -2963,9 +3359,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_1', ` +#else .macro KERNEL4x8_1 +#endif lxvw4x vs4, o0, AO @@ -2996,9 +3400,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_2', ` +#else .macro KERNEL4x8_2 +#endif lxvw4x vs0, o0, AO @@ -3029,9 +3441,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_E2', ` +#else .macro KERNEL4x8_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -3047,9 +3467,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUBI1', ` +#else .macro KERNEL4x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -3080,9 +3508,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUB1', ` +#else .macro KERNEL4x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -3113,9 +3549,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x8', ` +#else .macro SAVE4x8 +#endif mr T1, CO @@ -3204,14 +3648,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x4_1', ` +#else .macro LOAD4x4_1 +#endif lxvw4x vs0, o0, AO @@ -3226,9 +3678,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_I1', ` +#else .macro KERNEL4x4_I1 +#endif lxvw4x vs4, o0, AO @@ -3254,9 +3714,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_1', ` +#else .macro KERNEL4x4_1 +#endif lxvw4x vs4, o0, AO @@ -3282,9 +3750,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_2', ` +#else .macro KERNEL4x4_2 +#endif lxvw4x vs0, o0, AO @@ -3310,9 +3786,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_E2', ` +#else .macro KERNEL4x4_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -3324,9 +3808,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUBI1', ` +#else .macro KERNEL4x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -3352,9 +3844,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUB1', ` +#else .macro KERNEL4x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -3380,9 +3880,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x4', ` +#else .macro SAVE4x4 +#endif mr T1, CO @@ -3455,14 +3963,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x2_1', ` +#else .macro LOAD4x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -3479,9 +3995,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_I1', ` +#else .macro KERNEL4x2_I1 +#endif lxsspx vs4, o0, AO @@ -3513,9 +4037,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_1', ` +#else .macro KERNEL4x2_1 +#endif lxsspx vs4, o0, AO @@ -3547,9 +4079,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_2', ` +#else .macro KERNEL4x2_2 +#endif lxsspx vs0, o0, AO @@ -3581,9 +4121,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_E2', ` +#else .macro KERNEL4x2_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -3599,9 +4147,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUBI1', ` +#else .macro KERNEL4x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -3633,9 +4189,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUB1', ` +#else .macro KERNEL4x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -3667,9 +4231,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x2', ` +#else .macro SAVE4x2 +#endif mr T1, CO @@ -3758,14 +4330,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x1_1', ` +#else .macro LOAD4x1_1 +#endif lxsspx vs0, o0, AO @@ -3781,9 +4361,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_I1', ` +#else .macro KERNEL4x1_I1 +#endif lxsspx vs4, o0, AO @@ -3810,9 +4398,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_1', ` +#else .macro KERNEL4x1_1 +#endif lxsspx vs4, o0, AO @@ -3839,9 +4435,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_2', ` +#else .macro KERNEL4x1_2 +#endif lxsspx vs0, o0, AO @@ -3868,9 +4472,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_E2', ` +#else .macro KERNEL4x1_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -3882,9 +4494,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUBI1', ` +#else .macro KERNEL4x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -3911,9 +4531,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUB1', ` +#else .macro KERNEL4x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -3940,9 +4568,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x1', ` +#else .macro SAVE4x1 +#endif mr T1, CO @@ -4015,14 +4651,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x16_1', ` +#else .macro LOAD2x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -4038,9 +4682,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_I1', ` +#else .macro KERNEL2x16_I1 +#endif lxvw4x vs4, o0, AO @@ -4069,9 +4721,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_1', ` +#else .macro KERNEL2x16_1 +#endif lxvw4x vs4, o0, AO @@ -4100,9 +4760,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_2', ` +#else .macro KERNEL2x16_2 +#endif lxvw4x vs0, o0, AO @@ -4131,9 +4799,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_E2', ` +#else .macro KERNEL2x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -4147,9 +4823,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUBI1', ` +#else .macro KERNEL2x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -4178,9 +4862,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUB1', ` +#else .macro KERNEL2x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -4209,9 +4901,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x16', ` +#else .macro SAVE2x16 +#endif mr T1, CO @@ -4274,14 +4974,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -4295,9 +5003,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvw4x vs4, o0, AO @@ -4320,9 +5036,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvw4x vs4, o0, AO @@ -4345,9 +5069,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvw4x vs0, o0, AO @@ -4370,9 +5102,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -4382,9 +5122,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -4407,9 +5155,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -4432,9 +5188,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -4481,14 +5245,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvw4x vs0, o0, AO @@ -4501,9 +5273,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvw4x vs4, o0, AO @@ -4523,9 +5303,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvw4x vs4, o0, AO @@ -4545,9 +5333,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvw4x vs0, o0, AO @@ -4567,9 +5363,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -4577,9 +5381,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -4599,9 +5411,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -4621,9 +5441,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -4662,14 +5490,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -4684,9 +5520,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxsspx vs4, o0, AO @@ -4710,9 +5554,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxsspx vs4, o0, AO @@ -4736,9 +5588,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxsspx vs0, o0, AO @@ -4762,9 +5622,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -4774,9 +5642,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -4800,9 +5676,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -4826,9 +5710,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -4875,14 +5767,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxsspx vs0, o0, AO @@ -4896,9 +5796,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxsspx vs4, o0, AO @@ -4919,9 +5827,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxsspx vs4, o0, AO @@ -4942,9 +5858,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxsspx vs0, o0, AO @@ -4965,9 +5889,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -4975,9 +5907,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -4998,9 +5938,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -5021,9 +5969,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -5062,14 +6018,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x16_1', ` +#else .macro LOAD1x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -5084,9 +6048,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_I1', ` +#else .macro KERNEL1x16_I1 +#endif lxvw4x vs4, o0, AO @@ -5109,9 +6081,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_1', ` +#else .macro KERNEL1x16_1 +#endif lxvw4x vs4, o0, AO @@ -5134,9 +6114,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_2', ` +#else .macro KERNEL1x16_2 +#endif lxvw4x vs0, o0, AO @@ -5159,9 +6147,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs7, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_E2', ` +#else .macro KERNEL1x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -5170,9 +6166,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs7, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUBI1', ` +#else .macro KERNEL1x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -5195,9 +6199,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUB1', ` +#else .macro KERNEL1x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -5220,9 +6232,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x16', ` +#else .macro SAVE1x16 +#endif mr T1, CO @@ -5256,14 +6276,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -5276,9 +6304,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvw4x vs4, o0, AO @@ -5297,9 +6333,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvw4x vs4, o0, AO @@ -5318,9 +6362,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvw4x vs0, o0, AO @@ -5339,18 +6391,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddasp vs32, vs4, vs16 xvmaddasp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -5369,9 +6437,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -5390,9 +6466,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -5418,14 +6502,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvw4x vs0, o0, AO @@ -5437,9 +6529,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvw4x vs4, o0, AO @@ -5456,9 +6556,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvw4x vs4, o0, AO @@ -5475,9 +6583,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvw4x vs0, o0, AO @@ -5494,17 +6610,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddasp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -5521,9 +6653,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -5540,9 +6680,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -5564,14 +6712,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -5585,9 +6741,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxsspx vs4, o0, AO @@ -5607,9 +6771,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxsspx vs4, o0, AO @@ -5629,9 +6801,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxsspx vs0, o0, AO @@ -5651,18 +6831,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xsmaddadp vs32, vs4, vs16 xsmaddadp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -5682,9 +6878,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -5704,9 +6908,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -5732,14 +6944,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxsspx vs0, o0, AO @@ -5752,9 +6972,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxsspx vs4, o0, AO @@ -5772,9 +7000,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxsspx vs4, o0, AO @@ -5792,9 +7028,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxsspx vs0, o0, AO @@ -5812,17 +7056,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xsmaddadp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -5840,9 +7100,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -5860,9 +7128,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -5884,13 +7160,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`COPYB_4x8', ` +#else .macro COPYB_4x8 +#endif lxvw4x vs5, o0, BO @@ -5993,10 +7277,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs54, o48, BBO addi BBO, BBO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`COPYB_1x8', ` +#else .macro COPYB_1x8 +#endif lxvw4x vs5, o0, BO @@ -6026,5 +7318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs14, o48, BBO addi BBO, BBO, 64 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/sgemm_tcopy_macros_16_power8.S b/kernel/power/sgemm_tcopy_macros_16_power8.S index 53f9c8b82..ed592a604 100644 --- a/kernel/power/sgemm_tcopy_macros_16_power8.S +++ b/kernel/power/sgemm_tcopy_macros_16_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x16', ` +#else .macro COPY_4x16 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -88,13 +92,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs46, o32, T1 stxvw4x vs47, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x8', ` +#else .macro COPY_4x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -124,13 +136,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs38, o32, T1 stxvw4x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x4', ` +#else .macro COPY_4x4 +#endif lxvw4x vs32, o0, A0 @@ -150,13 +170,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x2', ` +#else .macro COPY_4x2 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -190,13 +218,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs38, o0, T1 stxsspx vs39, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x1', ` +#else .macro COPY_4x1 +#endif lxsspx vs32, o0, A0 @@ -218,13 +254,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs35, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x16', ` +#else .macro COPY_2x16 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -250,13 +294,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs38, o32, T1 stxvw4x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x8', ` +#else .macro COPY_2x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -272,13 +324,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs34, o32, T1 stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x4', ` +#else .macro COPY_2x4 +#endif lxvw4x vs32, o0, A0 @@ -290,13 +350,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x2', ` +#else .macro COPY_2x2 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -314,13 +382,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs34, o0, T1 stxsspx vs35, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x1', ` +#else .macro COPY_2x1 +#endif lxsspx vs32, o0, A0 @@ -332,13 +408,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs33, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x16', ` +#else .macro COPY_1x16 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -352,13 +436,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs34, o32, T1 stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x8', ` +#else .macro COPY_1x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -368,13 +460,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs32, o0, T1 stxvw4x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x4', ` +#else .macro COPY_1x4 +#endif lxvw4x vs32, o0, A0 @@ -382,13 +482,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x2', ` +#else .macro COPY_1x2 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -398,13 +506,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs32, o0, T1 stxsspx vs33, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x1', ` +#else .macro COPY_1x1 +#endif lxsspx vs32, o0, A0 @@ -412,5 +528,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/sgemm_tcopy_macros_8_power8.S b/kernel/power/sgemm_tcopy_macros_8_power8.S index 1b71d5bb3..f80f095dc 100644 --- a/kernel/power/sgemm_tcopy_macros_8_power8.S +++ b/kernel/power/sgemm_tcopy_macros_8_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x8', ` +#else .macro COPY_4x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -68,13 +72,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs38, o32, T1 stxvw4x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x4', ` +#else .macro COPY_4x4 +#endif lxvw4x vs32, o0, A0 @@ -94,13 +106,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x2', ` +#else .macro COPY_4x2 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -134,13 +154,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs38, o0, T1 stxsspx vs39, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x1', ` +#else .macro COPY_4x1 +#endif lxsspx vs32, o0, A0 @@ -162,13 +190,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs35, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x8', ` +#else .macro COPY_2x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -184,13 +220,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs34, o32, T1 stxvw4x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x4', ` +#else .macro COPY_2x4 +#endif lxvw4x vs32, o0, A0 @@ -202,13 +246,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x2', ` +#else .macro COPY_2x2 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -226,13 +278,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs34, o0, T1 stxsspx vs35, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x1', ` +#else .macro COPY_2x1 +#endif lxsspx vs32, o0, A0 @@ -244,13 +304,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs33, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x8', ` +#else .macro COPY_1x8 +#endif lxvw4x vs32, o0, A0 lxvw4x vs33, o16, A0 @@ -260,13 +328,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs32, o0, T1 stxvw4x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x4', ` +#else .macro COPY_1x4 +#endif lxvw4x vs32, o0, A0 @@ -274,13 +350,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvw4x vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x2', ` +#else .macro COPY_1x2 +#endif lxsspx vs32, o0, A0 lxsspx vs33, o4, A0 @@ -290,13 +374,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs32, o0, T1 stxsspx vs33, o4, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x1', ` +#else .macro COPY_1x1 +#endif lxsspx vs32, o0, A0 @@ -304,5 +396,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxsspx vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/srot_microk_power8.c b/kernel/power/srot_microk_power8.c index 6eecb60a1..329a8cd06 100644 --- a/kernel/power/srot_microk_power8.c +++ b/kernel/power/srot_microk_power8.c @@ -71,10 +71,10 @@ static void srot_kernel_16 (long n, float *x, float *y, float c, float s) "addi %4, %4, 64 \n\t" "addic. %2, %2, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmulsp 40, 32, 36 \n\t" // c * x "xvmulsp 41, 33, 36 \n\t" @@ -138,9 +138,9 @@ static void srot_kernel_16 (long n, float *x, float *y, float c, float s) "addi %4, %4, 128 \n\t" "addic. %2, %2, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmulsp 40, 32, 36 \n\t" // c * x "xvmulsp 41, 33, 36 \n\t" diff --git a/kernel/power/sscal_microk_power8.c b/kernel/power/sscal_microk_power8.c index 058ff3399..88fba3166 100644 --- a/kernel/power/sscal_microk_power8.c +++ b/kernel/power/sscal_microk_power8.c @@ -56,10 +56,10 @@ static void sscal_kernel_16 (long n, float *x, float alpha) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmulsp 40, 32, %x3 \n\t" "xvmulsp 41, 33, %x3 \n\t" @@ -92,9 +92,9 @@ static void sscal_kernel_16 (long n, float *x, float alpha) "addi %2, %2, 256 \n\t" "addic. %1, %1, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmulsp 40, 32, %x3 \n\t" "xvmulsp 41, 33, %x3 \n\t" @@ -147,8 +147,8 @@ static void sscal_kernel_16_zero (long n, float *x) ( "xxlxor %x3, %x3, %x3 \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "stxvd2x %x3, 0, %2 \n\t" "stxvd2x %x3, %4, %2 \n\t" @@ -162,7 +162,7 @@ static void sscal_kernel_16_zero (long n, float *x) "addi %2, %2, 128 \n\t" "addic. %1, %1, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" "#n=%1 x=%0=%2 t0=%x3 o16=%4 o32=%5 o48=%6 o64=%7 o80=%8 o96=%9 o112=%10" : diff --git a/kernel/power/sswap_microk_power8.c b/kernel/power/sswap_microk_power8.c index cfefdd6ef..a407018a8 100644 --- a/kernel/power/sswap_microk_power8.c +++ b/kernel/power/sswap_microk_power8.c @@ -39,8 +39,8 @@ static void sswap_kernel_32 (long n, float *x, float *y) { __asm__ ( - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "lxvd2x 32, 0, %4 \n\t" "lxvd2x 33, %5, %4 \n\t" @@ -83,7 +83,7 @@ static void sswap_kernel_32 (long n, float *x, float *y) "addi %4, %4, 128 \n\t" "addic. %2, %2, -32 \n\t" - "bgt 1b \n" + "bgt one%= \n" "#n=%2 x=%0=%3 y=%1=%4 o16=%5 o32=%6 o48=%7 o64=%8 o80=%9 o96=%10 o112=%11" : diff --git a/kernel/power/strmm_macros_16x8_power8.S b/kernel/power/strmm_macros_16x8_power8.S index 27bc1e89c..6c016d6fa 100644 --- a/kernel/power/strmm_macros_16x8_power8.S +++ b/kernel/power/strmm_macros_16x8_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=8 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x16_1', ` +#else .macro LOAD8x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -63,9 +67,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_I1', ` +#else .macro KERNEL8x16_I1 +#endif lxvw4x vs4, o0, AO @@ -133,9 +145,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_1', ` +#else .macro KERNEL8x16_1 +#endif lxvw4x vs4, o0, AO @@ -203,9 +223,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_2', ` +#else .macro KERNEL8x16_2 +#endif lxvw4x vs0, o0, AO @@ -273,9 +301,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_E2', ` +#else .macro KERNEL8x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -319,9 +355,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs7, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_SUBI1', ` +#else .macro KERNEL8x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -389,9 +433,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x16_SUB1', ` +#else .macro KERNEL8x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -459,9 +511,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs63, vs3, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x16', ` +#else .macro SAVE8x16 +#endif mr T1, CO @@ -698,14 +758,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x8_1', ` +#else .macro LOAD8x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -728,9 +796,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_I1', ` +#else .macro KERNEL8x8_I1 +#endif lxvw4x vs4, o0, AO @@ -780,9 +856,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_1', ` +#else .macro KERNEL8x8_1 +#endif lxvw4x vs4, o0, AO @@ -832,9 +916,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_2', ` +#else .macro KERNEL8x8_2 +#endif lxvw4x vs0, o0, AO @@ -884,9 +976,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_E2', ` +#else .macro KERNEL8x8_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -914,9 +1014,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_SUBI1', ` +#else .macro KERNEL8x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -966,9 +1074,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x8_SUB1', ` +#else .macro KERNEL8x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -1018,9 +1134,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x8', ` +#else .macro SAVE8x8 +#endif mr T1, CO @@ -1193,14 +1317,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x4_1', ` +#else .macro LOAD8x4_1 +#endif lxvw4x vs0, o0, AO @@ -1222,9 +1354,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_I1', ` +#else .macro KERNEL8x4_I1 +#endif lxvw4x vs4, o0, AO @@ -1265,9 +1405,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_1', ` +#else .macro KERNEL8x4_1 +#endif lxvw4x vs4, o0, AO @@ -1308,9 +1456,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_2', ` +#else .macro KERNEL8x4_2 +#endif lxvw4x vs0, o0, AO @@ -1351,9 +1507,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_E2', ` +#else .macro KERNEL8x4_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -1373,9 +1537,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_SUBI1', ` +#else .macro KERNEL8x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -1416,9 +1588,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x4_SUB1', ` +#else .macro KERNEL8x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -1459,9 +1639,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x4', ` +#else .macro SAVE8x4 +#endif mr T1, CO @@ -1602,14 +1790,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x2_1', ` +#else .macro LOAD8x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -1632,9 +1828,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_I1', ` +#else .macro KERNEL8x2_I1 +#endif lxsspx vs4, o0, AO @@ -1684,9 +1888,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_1', ` +#else .macro KERNEL8x2_1 +#endif lxsspx vs4, o0, AO @@ -1736,9 +1948,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_2', ` +#else .macro KERNEL8x2_2 +#endif lxsspx vs0, o0, AO @@ -1788,9 +2008,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_E2', ` +#else .macro KERNEL8x2_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -1818,9 +2046,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs5, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_SUBI1', ` +#else .macro KERNEL8x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -1870,9 +2106,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x2_SUB1', ` +#else .macro KERNEL8x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -1922,9 +2166,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs47, vs1, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x2', ` +#else .macro SAVE8x2 +#endif mr T1, CO @@ -2097,14 +2349,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=8 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD8x1_1', ` +#else .macro LOAD8x1_1 +#endif lxsspx vs0, o0, AO @@ -2126,9 +2386,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_I1', ` +#else .macro KERNEL8x1_I1 +#endif lxsspx vs4, o0, AO @@ -2169,9 +2437,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_1', ` +#else .macro KERNEL8x1_1 +#endif lxsspx vs4, o0, AO @@ -2212,9 +2488,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_2', ` +#else .macro KERNEL8x1_2 +#endif lxsspx vs0, o0, AO @@ -2255,9 +2539,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_E2', ` +#else .macro KERNEL8x1_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -2277,9 +2569,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs4, vs23 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_SUBI1', ` +#else .macro KERNEL8x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -2320,9 +2620,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL8x1_SUB1', ` +#else .macro KERNEL8x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -2363,9 +2671,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs0, vs15 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE8x1', ` +#else .macro SAVE8x1 +#endif mr T1, CO @@ -2506,14 +2822,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x16_1', ` +#else .macro LOAD4x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -2531,9 +2855,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_I1', ` +#else .macro KERNEL4x16_I1 +#endif lxvw4x vs4, o0, AO @@ -2574,9 +2906,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_1', ` +#else .macro KERNEL4x16_1 +#endif lxvw4x vs4, o0, AO @@ -2617,9 +2957,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_2', ` +#else .macro KERNEL4x16_2 +#endif lxvw4x vs0, o0, AO @@ -2660,9 +3008,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_E2', ` +#else .macro KERNEL4x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -2686,9 +3042,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs7, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUBI1', ` +#else .macro KERNEL4x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -2729,9 +3093,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x16_SUB1', ` +#else .macro KERNEL4x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -2772,9 +3144,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs47, vs3, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x16', ` +#else .macro SAVE4x16 +#endif mr T1, CO @@ -2895,14 +3275,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x8_1', ` +#else .macro LOAD4x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -2918,9 +3306,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_I1', ` +#else .macro KERNEL4x8_I1 +#endif lxvw4x vs4, o0, AO @@ -2951,9 +3347,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_1', ` +#else .macro KERNEL4x8_1 +#endif lxvw4x vs4, o0, AO @@ -2984,9 +3388,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_2', ` +#else .macro KERNEL4x8_2 +#endif lxvw4x vs0, o0, AO @@ -3017,9 +3429,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_E2', ` +#else .macro KERNEL4x8_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -3035,9 +3455,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUBI1', ` +#else .macro KERNEL4x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -3068,9 +3496,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x8_SUB1', ` +#else .macro KERNEL4x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -3101,9 +3537,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x8', ` +#else .macro SAVE4x8 +#endif mr T1, CO @@ -3192,14 +3636,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x4_1', ` +#else .macro LOAD4x4_1 +#endif lxvw4x vs0, o0, AO @@ -3214,9 +3666,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_I1', ` +#else .macro KERNEL4x4_I1 +#endif lxvw4x vs4, o0, AO @@ -3242,9 +3702,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_1', ` +#else .macro KERNEL4x4_1 +#endif lxvw4x vs4, o0, AO @@ -3270,9 +3738,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_2', ` +#else .macro KERNEL4x4_2 +#endif lxvw4x vs0, o0, AO @@ -3298,9 +3774,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_E2', ` +#else .macro KERNEL4x4_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -3312,9 +3796,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUBI1', ` +#else .macro KERNEL4x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -3340,9 +3832,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x4_SUB1', ` +#else .macro KERNEL4x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -3368,9 +3868,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x4', ` +#else .macro SAVE4x4 +#endif mr T1, CO @@ -3443,14 +3951,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x2_1', ` +#else .macro LOAD4x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -3466,9 +3982,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_I1', ` +#else .macro KERNEL4x2_I1 +#endif lxsspx vs4, o0, AO @@ -3499,9 +4023,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_1', ` +#else .macro KERNEL4x2_1 +#endif lxsspx vs4, o0, AO @@ -3532,9 +4064,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_2', ` +#else .macro KERNEL4x2_2 +#endif lxsspx vs0, o0, AO @@ -3565,9 +4105,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_E2', ` +#else .macro KERNEL4x2_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -3583,9 +4131,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs5, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUBI1', ` +#else .macro KERNEL4x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -3616,9 +4172,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x2_SUB1', ` +#else .macro KERNEL4x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -3649,9 +4213,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs39, vs1, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x2', ` +#else .macro SAVE4x2 +#endif mr T1, CO @@ -3740,14 +4312,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD4x1_1', ` +#else .macro LOAD4x1_1 +#endif lxsspx vs0, o0, AO @@ -3762,9 +4342,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_I1', ` +#else .macro KERNEL4x1_I1 +#endif lxsspx vs4, o0, AO @@ -3790,9 +4378,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_1', ` +#else .macro KERNEL4x1_1 +#endif lxsspx vs4, o0, AO @@ -3818,9 +4414,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_2', ` +#else .macro KERNEL4x1_2 +#endif lxsspx vs0, o0, AO @@ -3846,9 +4450,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_E2', ` +#else .macro KERNEL4x1_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -3860,9 +4472,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs4, vs19 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUBI1', ` +#else .macro KERNEL4x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -3888,9 +4508,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL4x1_SUB1', ` +#else .macro KERNEL4x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -3916,9 +4544,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs0, vs11 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE4x1', ` +#else .macro SAVE4x1 +#endif mr T1, CO @@ -3991,14 +4627,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x16_1', ` +#else .macro LOAD2x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -4014,9 +4658,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_I1', ` +#else .macro KERNEL2x16_I1 +#endif lxvw4x vs4, o0, AO @@ -4045,9 +4697,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_1', ` +#else .macro KERNEL2x16_1 +#endif lxvw4x vs4, o0, AO @@ -4076,9 +4736,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_2', ` +#else .macro KERNEL2x16_2 +#endif lxvw4x vs0, o0, AO @@ -4107,9 +4775,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_E2', ` +#else .macro KERNEL2x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -4123,9 +4799,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs7, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUBI1', ` +#else .macro KERNEL2x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -4154,9 +4838,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x16_SUB1', ` +#else .macro KERNEL2x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -4185,9 +4877,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs39, vs3, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x16', ` +#else .macro SAVE2x16 +#endif mr T1, CO @@ -4250,14 +4950,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -4271,9 +4979,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvw4x vs4, o0, AO @@ -4296,9 +5012,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvw4x vs4, o0, AO @@ -4321,9 +5045,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvw4x vs0, o0, AO @@ -4346,9 +5078,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -4358,9 +5098,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -4383,9 +5131,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -4408,9 +5164,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -4457,14 +5221,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvw4x vs0, o0, AO @@ -4477,9 +5249,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvw4x vs4, o0, AO @@ -4499,9 +5279,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvw4x vs4, o0, AO @@ -4521,9 +5309,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvw4x vs0, o0, AO @@ -4543,9 +5339,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -4553,9 +5357,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -4575,9 +5387,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -4597,9 +5417,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -4638,14 +5466,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -4659,9 +5495,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxsspx vs4, o0, AO @@ -4684,9 +5528,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxsspx vs4, o0, AO @@ -4709,9 +5561,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxsspx vs0, o0, AO @@ -4734,9 +5594,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -4746,9 +5614,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs5, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -4771,9 +5647,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -4796,9 +5680,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs35, vs1, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -4845,14 +5737,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxsspx vs0, o0, AO @@ -4865,9 +5765,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxsspx vs4, o0, AO @@ -4887,9 +5795,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxsspx vs4, o0, AO @@ -4909,9 +5825,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxsspx vs0, o0, AO @@ -4931,9 +5855,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xsmaddadp vs32, vs4, vs16 @@ -4941,9 +5873,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs4, vs17 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -4963,9 +5903,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -4985,9 +5933,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs0, vs9 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -5026,14 +5982,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=16 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x16_1', ` +#else .macro LOAD1x16_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -5048,9 +6012,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 4 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_I1', ` +#else .macro KERNEL1x16_I1 +#endif lxvw4x vs4, o0, AO @@ -5073,9 +6045,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_1', ` +#else .macro KERNEL1x16_1 +#endif lxvw4x vs4, o0, AO @@ -5098,9 +6078,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_2', ` +#else .macro KERNEL1x16_2 +#endif lxvw4x vs0, o0, AO @@ -5123,9 +6111,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs7, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_E2', ` +#else .macro KERNEL1x16_E2 +#endif xvmaddasp vs32, vs4, vs16 @@ -5134,9 +6130,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs7, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUBI1', ` +#else .macro KERNEL1x16_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -5159,9 +6163,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x16_SUB1', ` +#else .macro KERNEL1x16_SUB1 +#endif lxvw4x vs0, o0, AO @@ -5184,9 +6196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs35, vs3, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x16', ` +#else .macro SAVE1x16 +#endif mr T1, CO @@ -5220,14 +6240,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvw4x vs0, o0, AO lxvw4x vs1, o16, AO @@ -5240,9 +6268,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 4 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvw4x vs4, o0, AO @@ -5261,9 +6297,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvw4x vs4, o0, AO @@ -5282,9 +6326,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvw4x vs0, o0, AO @@ -5303,18 +6355,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddasp vs32, vs4, vs16 xvmaddasp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -5333,9 +6401,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvw4x vs0, o0, AO @@ -5354,9 +6430,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -5382,14 +6466,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvw4x vs0, o0, AO @@ -5401,9 +6493,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 4 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvw4x vs4, o0, AO @@ -5420,9 +6520,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvw4x vs4, o0, AO @@ -5439,9 +6547,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvw4x vs0, o0, AO @@ -5458,17 +6574,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddasp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvw4x vs0, o0, AO @@ -5485,9 +6617,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmulsp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvw4x vs0, o0, AO @@ -5504,9 +6644,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddasp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -5528,14 +6676,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxsspx vs0, o0, AO lxsspx vs1, o4, AO @@ -5548,9 +6704,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 4 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxsspx vs4, o0, AO @@ -5569,9 +6733,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxsspx vs4, o0, AO @@ -5590,9 +6762,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxsspx vs0, o0, AO @@ -5611,18 +6791,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xsmaddadp vs32, vs4, vs16 xsmaddadp vs33, vs5, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxsspx vs0, o0, AO @@ -5641,9 +6837,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxsspx vs0, o0, AO @@ -5662,9 +6866,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs33, vs1, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -5690,14 +6902,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 8 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxsspx vs0, o0, AO @@ -5709,9 +6929,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi BO, BO, 4 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxsspx vs4, o0, AO @@ -5728,9 +6956,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxsspx vs4, o0, AO @@ -5747,9 +6983,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxsspx vs0, o0, AO @@ -5766,17 +7010,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xsmaddadp vs32, vs4, vs16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxsspx vs0, o0, AO @@ -5793,9 +7053,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmuldp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxsspx vs0, o0, AO @@ -5812,9 +7080,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xsmaddadp vs32, vs0, vs8 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -5836,5 +7112,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi CO, CO, 4 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/zasum_microk_power8.c b/kernel/power/zasum_microk_power8.c index 82366902d..3f0af4232 100644 --- a/kernel/power/zasum_microk_power8.c +++ b/kernel/power/zasum_microk_power8.c @@ -68,10 +68,10 @@ static double zasum_kernel_8 (long n, double *x) "addi %2, %2, 128 \n\t" "addic. %1, %1, -8 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvabsdp 48, 40 \n\t" "xvabsdp 49, 41 \n\t" @@ -108,9 +108,9 @@ static double zasum_kernel_8 (long n, double *x) "xvadddp 38, 38, %x5 \n\t" "xvadddp 39, 39, %x6 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvabsdp 48, 40 \n\t" "xvabsdp 49, 41 \n\t" @@ -140,7 +140,7 @@ static double zasum_kernel_8 (long n, double *x) "xvadddp 32, 32, 36 \n\t" - "xxswapd 33, 32 \n\t" + XXSWAPD_S(33,32) "xsadddp %x0, 32, 33 \n" "#n=%1 x=%3=%2 sum=%0 o16=%8 o32=%9 o48=%10 o64=%11 o80=%12 o96=%13 o112=%14\n" diff --git a/kernel/power/zaxpy_microk_power8.c b/kernel/power/zaxpy_microk_power8.c index 124614f62..959050e5f 100644 --- a/kernel/power/zaxpy_microk_power8.c +++ b/kernel/power/zaxpy_microk_power8.c @@ -61,8 +61,8 @@ static void zaxpy_kernel_4 (long n, double *x, double *y, __asm__ ( - "xxspltd 32, %x19, 0 \n\t" // alpha_r - "xxspltd 33, %x20, 0 \n\t" // alpha_i + XXSPLTD_S(32,%x19,0) // alpha_r + XXSPLTD_S(33,%x20,0) // alpha_i "lxvd2x 36, 0, %21 \n\t" // mvec @@ -87,10 +87,10 @@ static void zaxpy_kernel_4 (long n, double *x, double *y, "lxvd2x 50, %23, %3 \n\t" // y2 "lxvd2x 51, %24, %3 \n\t" // y3 - "xxswapd %x8, 40 \n\t" // exchange real and imag part - "xxswapd %x9, 41 \n\t" // exchange real and imag part - "xxswapd %x10, 42 \n\t" // exchange real and imag part - "xxswapd %x11, 43 \n\t" // exchange real and imag part + XXSWAPD_S(%x8,40) // exchange real and imag part + XXSWAPD_S(%x9,41) // exchange real and imag part + XXSWAPD_S(%x10,42) // exchange real and imag part + XXSWAPD_S(%x11,43) // exchange real and imag part "addi %2, %2, 64 \n\t" "addi %3, %3, 64 \n\t" @@ -105,19 +105,19 @@ static void zaxpy_kernel_4 (long n, double *x, double *y, "lxvd2x %x6, %23, %3 \n\t" // y6 "lxvd2x %x7, %24, %3 \n\t" // y7 - "xxswapd %x12, 44 \n\t" // exchange real and imag part - "xxswapd %x13, 45 \n\t" // exchange real and imag part - "xxswapd %x14, 46 \n\t" // exchange real and imag part - "xxswapd %x15, 47 \n\t" // exchange real and imag part + XXSWAPD_S(%x12,44) // exchange real and imag part + XXSWAPD_S(%x13,45) // exchange real and imag part + XXSWAPD_S(%x14,46) // exchange real and imag part + XXSWAPD_S(%x15,47) // exchange real and imag part "addi %2, %2, 64 \n\t" "addi %3, %3, 64 \n\t" "addic. %1, %1, -8 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmaddadp 48, 40, 32 \n\t" // alpha_r * x0_r , alpha_r * x0_i "xvmaddadp 49, 41, 32 \n\t" @@ -163,31 +163,31 @@ static void zaxpy_kernel_4 (long n, double *x, double *y, "addi %16, %16, 64 \n\t" - "xxswapd %x8, 40 \n\t" // exchange real and imag part - "xxswapd %x9, 41 \n\t" // exchange real and imag part + XXSWAPD_S(%x8,40) // exchange real and imag part + XXSWAPD_S(%x9,41) // exchange real and imag part "lxvd2x 48, 0, %3 \n\t" // y0 "lxvd2x 49, %22, %3 \n\t" // y1 - "xxswapd %x10, 42 \n\t" // exchange real and imag part - "xxswapd %x11, 43 \n\t" // exchange real and imag part + XXSWAPD_S(%x10,42) // exchange real and imag part + XXSWAPD_S(%x11,43) // exchange real and imag part "lxvd2x 50, %23, %3 \n\t" // y2 "lxvd2x 51, %24, %3 \n\t" // y3 - "xxswapd %x12, 44 \n\t" // exchange real and imag part + XXSWAPD_S(%x12,44) // exchange real and imag part "addi %3, %3, 64 \n\t" - "xxswapd %x13, 45 \n\t" // exchange real and imag part + XXSWAPD_S(%x13,45) // exchange real and imag part "lxvd2x %x4, 0, %3 \n\t" // y4 "lxvd2x %x5, %22, %3 \n\t" // y5 - "xxswapd %x14, 46 \n\t" // exchange real and imag part - "xxswapd %x15, 47 \n\t" // exchange real and imag part + XXSWAPD_S(%x14,46) // exchange real and imag part + XXSWAPD_S(%x15,47) // exchange real and imag part "lxvd2x %x6, %23, %3 \n\t" // y6 "lxvd2x %x7, %24, %3 \n\t" // y7 "addi %3, %3, 64 \n\t" "addic. %1, %1, -8 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmaddadp 48, 40, 32 \n\t" // alpha_r * x0_r , alpha_r * x0_i "xvmaddadp 49, 41, 32 \n\t" diff --git a/kernel/power/zcopy_microk_power8.c b/kernel/power/zcopy_microk_power8.c index 5ca34b633..e29547047 100644 --- a/kernel/power/zcopy_microk_power8.c +++ b/kernel/power/zcopy_microk_power8.c @@ -62,10 +62,10 @@ static void zcopy_kernel_16 (long n, FLOAT *x, FLOAT *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -16 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "stxvd2x 32, 0, %3 \n\t" "stxvd2x 33, %5, %3 \n\t" @@ -108,9 +108,9 @@ static void zcopy_kernel_16 (long n, FLOAT *x, FLOAT *y) "addi %2, %2, 128 \n\t" "addic. %1, %1, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "stxvd2x 32, 0, %3 \n\t" "stxvd2x 33, %5, %3 \n\t" diff --git a/kernel/power/zdot_microk_power8.c b/kernel/power/zdot_microk_power8.c index 71078b66c..dcde82433 100644 --- a/kernel/power/zdot_microk_power8.c +++ b/kernel/power/zdot_microk_power8.c @@ -60,10 +60,10 @@ static void zdot_kernel_8 (long n, double *x, double *y, double *dot) "lxvd2x 43, %9, %2 \n\t" // x3_r, x3_i "lxvd2x 51, %9, %3 \n\t" // y3_r, y3_i - "xxswapd 0, 48 \n\t" // y0_i, y0_r - "xxswapd 1, 49 \n\t" // y1_i, y1_r - "xxswapd 2, 50 \n\t" // y2_i, y2_r - "xxswapd 3, 51 \n\t" // y3_i, y3_r + XXSWAPD_S(0,48) // y0_i, y0_r + XXSWAPD_S(1,49) // y1_i, y1_r + XXSWAPD_S(2,50) // y2_i, y2_r + XXSWAPD_S(3,51) // y3_i, y3_r "addi %2, %2, 64 \n\t" "addi %3, %3, 64 \n\t" @@ -77,19 +77,19 @@ static void zdot_kernel_8 (long n, double *x, double *y, double *dot) "lxvd2x 47, %9, %2 \n\t" // x3_r, x3_i "lxvd2x 7, %9, %3 \n\t" // y3_r, y3_i - "xxswapd 8, 4 \n\t" // y0_i, y0_r - "xxswapd 9, 5 \n\t" // y1_i, y1_r - "xxswapd 10, 6 \n\t" // y2_i, y2_r - "xxswapd 11, 7 \n\t" // y3_i, y3_r + XXSWAPD_S(8,4) // y0_i, y0_r + XXSWAPD_S(9,5) // y1_i, y1_r + XXSWAPD_S(10,6) // y2_i, y2_r + XXSWAPD_S(11,7) // y3_i, y3_r "addi %2, %2, 64 \n\t" "addi %3, %3, 64 \n\t" "addic. %1, %1, -8 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmaddadp 32, 40, 48 \n\t" // x0_r * y0_r , x0_i * y0_i "lxvd2x 48, 0, %3 \n\t" // y0_r, y0_i @@ -111,14 +111,14 @@ static void zdot_kernel_8 (long n, double *x, double *y, double *dot) "xvmaddadp 39, 43, 3 \n\t" // x3_r * y3_i , x3_i * y3_r "lxvd2x 43, %9, %2 \n\t" // x3_r, x3_i - "xxswapd 0,48 \n\t" // y0_i, y0_r - "xxswapd 1,49 \n\t" // y1_i, y1_r + XXSWAPD_S(0,48) // y0_i, y0_r + XXSWAPD_S(1,49) // y1_i, y1_r "addi %2, %2, 64 \n\t" "addi %3, %3, 64 \n\t" - "xxswapd 2,50 \n\t" // y2_i, y2_r - "xxswapd 3,51 \n\t" // y3_i, y3_r + XXSWAPD_S(2,50) // y2_i, y2_r + XXSWAPD_S(3,51) // y3_i, y3_r "xvmaddadp 32, 44, 4 \n\t" // x0_r * y0_r , x0_i * y0_i "lxvd2x 4, 0, %3 \n\t" // y0_r, y0_i @@ -138,19 +138,19 @@ static void zdot_kernel_8 (long n, double *x, double *y, double *dot) "xvmaddadp 39, 47, 11 \n\t" // x3_r * y3_i , x3_i * y3_r "lxvd2x 47, %9, %2 \n\t" // x3_r, x3_i - "xxswapd 8,4 \n\t" // y0_i, y0_r - "xxswapd 9,5 \n\t" // y1_i, y1_r + XXSWAPD_S(8,4) // y0_i, y0_r + XXSWAPD_S(9,5) // y1_i, y1_r "addi %2, %2, 64 \n\t" "addi %3, %3, 64 \n\t" - "xxswapd 10,6 \n\t" // y2_i, y2_r - "xxswapd 11,7 \n\t" // y3_i, y3_r + XXSWAPD_S(10,6) // y2_i, y2_r + XXSWAPD_S(11,7) // y3_i, y3_r "addic. %1, %1, -8 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmaddadp 32, 40, 48 \n\t" // x0_r * y0_r , x0_i * y0_i "xvmaddadp 34, 41, 49 \n\t" // x1_r * y1_r , x1_i * y1_i diff --git a/kernel/power/zgemm_macros_8x2_power8.S b/kernel/power/zgemm_macros_8x2_power8.S index c43a115b2..24a36470c 100644 --- a/kernel/power/zgemm_macros_8x2_power8.S +++ b/kernel/power/zgemm_macros_8x2_power8.S @@ -67,7 +67,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -91,9 +95,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -151,9 +163,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -211,9 +231,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -271,9 +299,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs15, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -311,9 +347,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs15, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -371,9 +415,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -431,9 +483,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -455,13 +515,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -479,13 +539,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -503,13 +563,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -527,13 +587,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -551,13 +611,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs41, vs41 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs41,vs41) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs40 // realA*realB XSFADD_R2 vs0, vs0, vs41 // imagA*imagB - xxswapd vs40, vs40 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs41, vs41 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs40,vs40) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs41,vs41) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs40 // realA*imagB XSFADD_I2 vs1, vs1, vs41 // imagA*realB @@ -575,13 +635,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs43, vs43 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs43,vs43) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs42 // realA*realB XSFADD_R2 vs0, vs0, vs43 // imagA*imagB - xxswapd vs42, vs42 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs43, vs43 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs42,vs42) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs43,vs43) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs42 // realA*imagB XSFADD_I2 vs1, vs1, vs43 // imagA*realB @@ -599,13 +659,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs45, vs45 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs45,vs45) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs44 // realA*realB XSFADD_R2 vs0, vs0, vs45 // imagA*imagB - xxswapd vs44, vs44 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs45, vs45 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs44,vs44) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs45,vs45) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs44 // realA*imagB XSFADD_I2 vs1, vs1, vs45 // imagA*realB @@ -623,13 +683,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs47, vs47 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs47,vs47) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs46 // realA*realB XSFADD_R2 vs0, vs0, vs47 // imagA*imagB - xxswapd vs46, vs46 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs47, vs47 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs46,vs46) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs47,vs47) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs46 // realA*imagB XSFADD_I2 vs1, vs1, vs47 // imagA*realB @@ -685,13 +745,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs49, vs49 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs49,vs49) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs48 // realA*realB XSFADD_R2 vs0, vs0, vs49 // imagA*imagB - xxswapd vs48, vs48 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs49, vs49 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs48,vs48) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs49,vs49) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs48 // realA*imagB XSFADD_I2 vs1, vs1, vs49 // imagA*realB @@ -709,13 +769,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs51, vs51 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs51,vs51) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs50 // realA*realB XSFADD_R2 vs0, vs0, vs51 // imagA*imagB - xxswapd vs50, vs50 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs51, vs51 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs50,vs50) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs51,vs51) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs50 // realA*imagB XSFADD_I2 vs1, vs1, vs51 // imagA*realB @@ -733,13 +793,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs53, vs53 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs53,vs53) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs52 // realA*realB XSFADD_R2 vs0, vs0, vs53 // imagA*imagB - xxswapd vs52, vs52 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs53, vs53 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs52,vs52) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs53,vs53) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs52 // realA*imagB XSFADD_I2 vs1, vs1, vs53 // imagA*realB @@ -757,13 +817,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs55, vs55 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs55,vs55) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs54 // realA*realB XSFADD_R2 vs0, vs0, vs55 // imagA*imagB - xxswapd vs54, vs54 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs55, vs55 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs54,vs54) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs55,vs55) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs54 // realA*imagB XSFADD_I2 vs1, vs1, vs55 // imagA*realB @@ -781,13 +841,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs57, vs57 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs57,vs57) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs56 // realA*realB XSFADD_R2 vs0, vs0, vs57 // imagA*imagB - xxswapd vs56, vs56 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs57, vs57 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs56,vs56) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs57,vs57) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs56 // realA*imagB XSFADD_I2 vs1, vs1, vs57 // imagA*realB @@ -805,13 +865,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs59, vs59 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs59,vs59) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs58 // realA*realB XSFADD_R2 vs0, vs0, vs59 // imagA*imagB - xxswapd vs58, vs58 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs59, vs59 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs58,vs58) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs59,vs59) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs58 // realA*imagB XSFADD_I2 vs1, vs1, vs59 // imagA*realB @@ -829,13 +889,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs61, vs61 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs61,vs61) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs60 // realA*realB XSFADD_R2 vs0, vs0, vs61 // imagA*imagB - xxswapd vs60, vs60 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs61, vs61 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs60,vs60) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs61,vs61) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs60 // realA*imagB XSFADD_I2 vs1, vs1, vs61 // imagA*realB @@ -853,13 +913,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs63, vs63 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs63,vs63) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs62 // realA*realB XSFADD_R2 vs0, vs0, vs63 // imagA*imagB - xxswapd vs62, vs62 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs63, vs63 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs62,vs62) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs63,vs63) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs62 // realA*imagB XSFADD_I2 vs1, vs1, vs63 // imagA*realB @@ -900,14 +960,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T2, T2, LDC addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -924,9 +992,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -961,9 +1037,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -998,9 +1082,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1035,9 +1127,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs11, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -1059,9 +1159,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs11, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1096,9 +1204,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1133,9 +1249,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -1152,13 +1276,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -1176,13 +1300,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -1200,13 +1324,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -1224,13 +1348,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -1273,13 +1397,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs41, vs41 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs41,vs41) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs40 // realA*realB XSFADD_R2 vs0, vs0, vs41 // imagA*imagB - xxswapd vs40, vs40 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs41, vs41 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs40,vs40) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs41,vs41) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs40 // realA*imagB XSFADD_I2 vs1, vs1, vs41 // imagA*realB @@ -1297,13 +1421,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs43, vs43 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs43,vs43) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs42 // realA*realB XSFADD_R2 vs0, vs0, vs43 // imagA*imagB - xxswapd vs42, vs42 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs43, vs43 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs42,vs42) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs43,vs43) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs42 // realA*imagB XSFADD_I2 vs1, vs1, vs43 // imagA*realB @@ -1321,13 +1445,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs45, vs45 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs45,vs45) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs44 // realA*realB XSFADD_R2 vs0, vs0, vs45 // imagA*imagB - xxswapd vs44, vs44 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs45, vs45 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs44,vs44) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs45,vs45) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs44 // realA*imagB XSFADD_I2 vs1, vs1, vs45 // imagA*realB @@ -1345,13 +1469,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs47, vs47 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs47,vs47) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs46 // realA*realB XSFADD_R2 vs0, vs0, vs47 // imagA*imagB - xxswapd vs46, vs46 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs47, vs47 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs46,vs46) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs47,vs47) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs46 // realA*imagB XSFADD_I2 vs1, vs1, vs47 // imagA*realB @@ -1383,14 +1507,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -1405,9 +1537,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -1432,9 +1572,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -1459,9 +1607,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1486,9 +1642,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs9, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -1502,9 +1666,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs9, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1529,9 +1701,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1556,9 +1736,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -1573,13 +1761,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -1597,13 +1785,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -1640,13 +1828,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -1664,13 +1852,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -1698,14 +1886,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -1719,9 +1915,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -1741,9 +1945,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -1763,9 +1975,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -1785,9 +2005,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs8, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -1797,9 +2025,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs8, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -1819,9 +2055,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -1841,9 +2085,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -1857,13 +2109,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -1897,13 +2149,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -1929,14 +2181,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -1958,9 +2218,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -1999,9 +2267,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2040,9 +2316,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2081,9 +2365,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs15, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -2104,9 +2396,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs15, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2145,9 +2445,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2186,9 +2494,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -2210,13 +2526,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -2234,13 +2550,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -2258,13 +2574,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -2282,13 +2598,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -2306,13 +2622,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs41, vs41 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs41,vs41) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs40 // realA*realB XSFADD_R2 vs0, vs0, vs41 // imagA*imagB - xxswapd vs40, vs40 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs41, vs41 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs40,vs40) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs41,vs41) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs40 // realA*imagB XSFADD_I2 vs1, vs1, vs41 // imagA*realB @@ -2330,13 +2646,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs43, vs43 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs43,vs43) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs42 // realA*realB XSFADD_R2 vs0, vs0, vs43 // imagA*imagB - xxswapd vs42, vs42 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs43, vs43 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs42,vs42) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs43,vs43) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs42 // realA*imagB XSFADD_I2 vs1, vs1, vs43 // imagA*realB @@ -2354,13 +2670,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs45, vs45 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs45,vs45) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs44 // realA*realB XSFADD_R2 vs0, vs0, vs45 // imagA*imagB - xxswapd vs44, vs44 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs45, vs45 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs44,vs44) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs45,vs45) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs44 // realA*imagB XSFADD_I2 vs1, vs1, vs45 // imagA*realB @@ -2378,13 +2694,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs47, vs47 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs47,vs47) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs46 // realA*realB XSFADD_R2 vs0, vs0, vs47 // imagA*imagB - xxswapd vs46, vs46 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs47, vs47 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs46,vs46) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs47,vs47) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs46 // realA*imagB XSFADD_I2 vs1, vs1, vs47 // imagA*realB @@ -2425,14 +2741,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T2, T2, LDC addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -2447,9 +2771,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2473,9 +2805,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2499,9 +2839,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2525,9 +2873,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs11, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -2540,9 +2896,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs11, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2566,9 +2930,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2592,9 +2964,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -2611,13 +2991,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -2635,13 +3015,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -2659,13 +3039,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -2683,13 +3063,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -2721,14 +3101,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -2741,9 +3129,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2761,9 +3157,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2781,9 +3185,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2801,9 +3213,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs9, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -2812,9 +3232,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs9, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2832,9 +3260,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2852,9 +3288,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -2869,13 +3313,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -2893,13 +3337,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -2927,14 +3371,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxvd2x vs16, o0, BO // load real part from B lxvd2x vs17, o16, BO // load imag part from B @@ -2946,9 +3398,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -2963,9 +3423,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -2980,9 +3448,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -2997,18 +3473,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs33, vs8, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real xvmaddadp vs33, vs8, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -3023,9 +3515,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -3040,9 +3540,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -3056,13 +3564,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -3088,11 +3596,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`ZCOPYB_1x1', ` +#else .macro ZCOPYB_1x1 +#endif lxvdsx vs4, o0, BO // b0_r lxvdsx vs5, o8, BO // b0_i @@ -3101,10 +3617,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs5, o16, BBO addi BBO, BBO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`ZCOPYB_8x1', ` +#else .macro ZCOPYB_8x1 +#endif lxvd2x vs32, o0, BO lxvd2x vs33, o16, BO @@ -3118,23 +3642,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lxvd2x vs39, o48, BO addi BO, BO, 64 - xxspltd vs40, vs32, 0 - xxspltd vs41, vs32, 1 - xxspltd vs42, vs33, 0 - xxspltd vs43, vs33, 1 - xxspltd vs44, vs34, 0 - xxspltd vs45, vs34, 1 - xxspltd vs46, vs35, 0 - xxspltd vs47, vs35, 1 + XXSPLTD(vs40,vs32,0) + XXSPLTD(vs41,vs32,1) + XXSPLTD(vs42,vs33,0) + XXSPLTD(vs43,vs33,1) + XXSPLTD(vs44,vs34,0) + XXSPLTD(vs45,vs34,1) + XXSPLTD(vs46,vs35,0) + XXSPLTD(vs47,vs35,1) - xxspltd vs48, vs36, 0 - xxspltd vs49, vs36, 1 - xxspltd vs50, vs37, 0 - xxspltd vs51, vs37, 1 - xxspltd vs52, vs38, 0 - xxspltd vs53, vs38, 1 - xxspltd vs54, vs39, 0 - xxspltd vs55, vs39, 1 + XXSPLTD(vs48,vs36,0) + XXSPLTD(vs49,vs36,1) + XXSPLTD(vs50,vs37,0) + XXSPLTD(vs51,vs37,1) + XXSPLTD(vs52,vs38,0) + XXSPLTD(vs53,vs38,1) + XXSPLTD(vs54,vs39,0) + XXSPLTD(vs55,vs39,1) stxvd2x vs40, o0, BBO stxvd2x vs41, o16, BBO @@ -3160,6 +3684,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs55, o48, BBO addi BBO, BBO, 64 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/zgemm_tcopy_macros_8_power8.S b/kernel/power/zgemm_tcopy_macros_8_power8.S index 3f5a5ed03..654332375 100644 --- a/kernel/power/zgemm_tcopy_macros_8_power8.S +++ b/kernel/power/zgemm_tcopy_macros_8_power8.S @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=4 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x8', ` +#else .macro COPY_4x8 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -144,14 +148,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs12, o32, T1 stxvd2x vs13, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x4', ` +#else .macro COPY_4x4 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -209,14 +221,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs46, o32, T1 stxvd2x vs47, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x2', ` +#else .macro COPY_4x2 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -254,14 +274,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs38, o32, T1 stxvd2x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=4 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_4x1', ` +#else .macro COPY_4x1 +#endif lxvd2x vs32, o0, A0 addi A0, A0, 16 @@ -289,14 +317,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x8', ` +#else .macro COPY_2x8 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -350,14 +386,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs46, o32, T1 stxvd2x vs47, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x4', ` +#else .macro COPY_2x4 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -387,14 +431,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs38, o32, T1 stxvd2x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x2', ` +#else .macro COPY_2x2 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -414,14 +466,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs34, o32, T1 stxvd2x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_2x1', ` +#else .macro COPY_2x1 +#endif lxvd2x vs32, o0, A0 addi A0, A0, 16 @@ -437,14 +497,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x8', ` +#else .macro COPY_1x8 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -472,14 +540,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs38, o32, T1 stxvd2x vs39, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x4', ` +#else .macro COPY_1x4 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -495,14 +571,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs34, o32, T1 stxvd2x vs35, o48, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x2', ` +#else .macro COPY_1x2 +#endif lxvd2x vs32, o0, A0 lxvd2x vs33, o16, A0 @@ -514,14 +598,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs32, o0, T1 stxvd2x vs33, o16, T1 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`COPY_1x1', ` +#else .macro COPY_1x1 +#endif lxvd2x vs32, o0, A0 addi A0, A0, 16 @@ -531,5 +623,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stxvd2x vs32, o0, T1 +#if defined(_AIX) +') +#else .endm +#endif diff --git a/kernel/power/zrot.c b/kernel/power/zrot.c index d45468fd5..c6d666178 100644 --- a/kernel/power/zrot.c +++ b/kernel/power/zrot.c @@ -40,8 +40,8 @@ static void zrot_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT cosA, FLOAT si __asm__ ( - "xxspltd 36, %x[cos], 0 \n\t" // load c to both dwords - "xxspltd 37, %x[sin], 0 \n\t" // load s to both dwords + XXSPLTD_S(36,%x[cos],0) // load c to both dwords + XXSPLTD_S(37,%x[sin],0) // load s to both dwords "lxvd2x 32, 0, %[x_ptr] \n\t" // load x "lxvd2x 33, %[i16], %[x_ptr] \n\t" @@ -57,10 +57,10 @@ static void zrot_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT cosA, FLOAT si "addi %[y_ptr], %[y_ptr], 64 \n\t" "addic. %[temp_n], %[temp_n], -4 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmuldp 40, 32, 36 \n\t" // c * x "xvmuldp 41, 33, 36 \n\t" @@ -124,9 +124,9 @@ static void zrot_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT cosA, FLOAT si "addi %[y_ptr], %[y_ptr], 128 \n\t" "addic. %[temp_n], %[temp_n], -4 \n\t" - "bgt+ 1b \n" + "bgt+ one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmuldp 40, 32, 36 \n\t" // c * x "xvmuldp 41, 33, 36 \n\t" diff --git a/kernel/power/zscal_microk_power8.c b/kernel/power/zscal_microk_power8.c index aba9029a0..567331775 100644 --- a/kernel/power/zscal_microk_power8.c +++ b/kernel/power/zscal_microk_power8.c @@ -58,8 +58,8 @@ static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i) "dcbt 0, %2 \n\t" "xsnegdp 33, %x16 \n\t" // -alpha_i - "xxspltd 32, %x15, 0 \n\t" // alpha_r , alpha_r - "xxmrghd 33, 33, %x16 \n\t" // -alpha_i , alpha_i + XXSPLTD_S(32,%x15,0) // alpha_r , alpha_r + XXMRGHD_S(33,33,%x16) // -alpha_i , alpha_i "lxvd2x 40, 0, %2 \n\t" // x0_r, x0_i "lxvd2x 41, %17, %2 \n\t" @@ -73,10 +73,10 @@ static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i) "addi %2, %2, 128 \n\t" "addic. %1, %1, -8 \n\t" - "ble 2f \n\t" + "ble two%= \n\t" - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r "xvmuldp 49, 41, 32 \n\t" @@ -87,14 +87,14 @@ static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i) "xvmuldp %x5, 46, 32 \n\t" "xvmuldp %x6, 47, 32 \n\t" - "xxswapd %x7, 40 \n\t" - "xxswapd %x8, 41 \n\t" - "xxswapd %x9, 42 \n\t" - "xxswapd %x10, 43 \n\t" - "xxswapd %x11, 44 \n\t" - "xxswapd %x12, 45 \n\t" - "xxswapd %x13, 46 \n\t" - "xxswapd %x14, 47 \n\t" + XXSWAPD_S(%x7,40) + XXSWAPD_S(%x8,41) + XXSWAPD_S(%x9,42) + XXSWAPD_S(%x10,43) + XXSWAPD_S(%x11,44) + XXSWAPD_S(%x12,45) + XXSWAPD_S(%x13,46) + XXSWAPD_S(%x14,47) "xvmuldp %x7, %x7, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i "xvmuldp %x8, %x8, 33 \n\t" @@ -147,9 +147,9 @@ static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i) "addi %2, %2, 256 \n\t" "addic. %1, %1, -8 \n\t" - "bgt 1b \n" + "bgt one%= \n" - "2: \n\t" + "two%=: \n\t" "xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r "xvmuldp 49, 41, 32 \n\t" @@ -160,14 +160,14 @@ static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i) "xvmuldp %x5, 46, 32 \n\t" "xvmuldp %x6, 47, 32 \n\t" - "xxswapd %x7, 40 \n\t" - "xxswapd %x8, 41 \n\t" - "xxswapd %x9, 42 \n\t" - "xxswapd %x10, 43 \n\t" - "xxswapd %x11, 44 \n\t" - "xxswapd %x12, 45 \n\t" - "xxswapd %x13, 46 \n\t" - "xxswapd %x14, 47 \n\t" + XXSWAPD_S(%x7,40) + XXSWAPD_S(%x8,41) + XXSWAPD_S(%x9,42) + XXSWAPD_S(%x10,43) + XXSWAPD_S(%x11,44) + XXSWAPD_S(%x12,45) + XXSWAPD_S(%x13,46) + XXSWAPD_S(%x14,47) "addi %2, %2, -128 \n\t" diff --git a/kernel/power/zswap_microk_power8.c b/kernel/power/zswap_microk_power8.c index 54391ba5d..1e9fbe2cf 100644 --- a/kernel/power/zswap_microk_power8.c +++ b/kernel/power/zswap_microk_power8.c @@ -40,8 +40,8 @@ zswap_kernel_16 (long n, double *x, double *y) { __asm__ ( - ".p2align 5 \n" - "1: \n\t" + ".align 5 \n" + "one%=: \n\t" "lxvd2x 32, 0, %4 \n\t" "lxvd2x 33, %5, %4 \n\t" "lxvd2x 34, %6, %4 \n\t" @@ -130,7 +130,7 @@ zswap_kernel_16 (long n, double *x, double *y) "addi %4, %4, 128 \n\t" "addic. %2, %2, -16 \n\t" - "bgt 1b \n" + "bgt one%= \n" "#n=%2 x=%0=%3 y=%1=%4 o16=%5 o32=%6 o48=%7 o64=%8 o80=%9 o96=%10 o112=%11" : diff --git a/kernel/power/ztrmm_macros_8x2_power8.S b/kernel/power/ztrmm_macros_8x2_power8.S index 701ec65c8..b3fbcd220 100644 --- a/kernel/power/ztrmm_macros_8x2_power8.S +++ b/kernel/power/ztrmm_macros_8x2_power8.S @@ -68,7 +68,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Macros for N=2 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x8_1', ` +#else .macro LOAD2x8_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -92,9 +96,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_I1', ` +#else .macro KERNEL2x8_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -152,9 +164,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_1', ` +#else .macro KERNEL2x8_1 +#endif xvmaddadp vs32, vs0, vs16 // real*real, imag*real @@ -221,9 +241,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_2', ` +#else .macro KERNEL2x8_2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -289,9 +317,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 addi BO, BO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_E2', ` +#else .macro KERNEL2x8_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -329,9 +365,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs15, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUBI1', ` +#else .macro KERNEL2x8_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -389,9 +433,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x8_SUB1', ` +#else .macro KERNEL2x8_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -449,9 +501,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs63, vs7, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x8', ` +#else .macro SAVE2x8 +#endif mr T1, CO @@ -473,13 +533,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -497,13 +557,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -521,13 +581,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -545,13 +605,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -569,13 +629,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs41, vs41 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs41,vs41) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs40 // realA*realB XSFADD_R2 vs0, vs0, vs41 // imagA*imagB - xxswapd vs40, vs40 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs41, vs41 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs40,vs40) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs41,vs41) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs40 // realA*imagB XSFADD_I2 vs1, vs1, vs41 // imagA*realB @@ -593,13 +653,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs43, vs43 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs43,vs43) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs42 // realA*realB XSFADD_R2 vs0, vs0, vs43 // imagA*imagB - xxswapd vs42, vs42 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs43, vs43 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs42,vs42) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs43,vs43) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs42 // realA*imagB XSFADD_I2 vs1, vs1, vs43 // imagA*realB @@ -617,13 +677,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs45, vs45 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs45,vs45) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs44 // realA*realB XSFADD_R2 vs0, vs0, vs45 // imagA*imagB - xxswapd vs44, vs44 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs45, vs45 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs44,vs44) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs45,vs45) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs44 // realA*imagB XSFADD_I2 vs1, vs1, vs45 // imagA*realB @@ -641,13 +701,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs47, vs47 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs47,vs47) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs46 // realA*realB XSFADD_R2 vs0, vs0, vs47 // imagA*imagB - xxswapd vs46, vs46 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs47, vs47 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs46,vs46) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs47,vs47) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs46 // realA*imagB XSFADD_I2 vs1, vs1, vs47 // imagA*realB @@ -703,13 +763,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs49, vs49 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs49,vs49) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs48 // realA*realB XSFADD_R2 vs0, vs0, vs49 // imagA*imagB - xxswapd vs48, vs48 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs49, vs49 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs48,vs48) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs49,vs49) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs48 // realA*imagB XSFADD_I2 vs1, vs1, vs49 // imagA*realB @@ -727,13 +787,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs51, vs51 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs51,vs51) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs50 // realA*realB XSFADD_R2 vs0, vs0, vs51 // imagA*imagB - xxswapd vs50, vs50 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs51, vs51 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs50,vs50) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs51,vs51) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs50 // realA*imagB XSFADD_I2 vs1, vs1, vs51 // imagA*realB @@ -751,13 +811,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs53, vs53 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs53,vs53) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs52 // realA*realB XSFADD_R2 vs0, vs0, vs53 // imagA*imagB - xxswapd vs52, vs52 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs53, vs53 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs52,vs52) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs53,vs53) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs52 // realA*imagB XSFADD_I2 vs1, vs1, vs53 // imagA*realB @@ -775,13 +835,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs55, vs55 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs55,vs55) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs54 // realA*realB XSFADD_R2 vs0, vs0, vs55 // imagA*imagB - xxswapd vs54, vs54 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs55, vs55 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs54,vs54) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs55,vs55) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs54 // realA*imagB XSFADD_I2 vs1, vs1, vs55 // imagA*realB @@ -799,13 +859,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs57, vs57 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs57,vs57) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs56 // realA*realB XSFADD_R2 vs0, vs0, vs57 // imagA*imagB - xxswapd vs56, vs56 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs57, vs57 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs56,vs56) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs57,vs57) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs56 // realA*imagB XSFADD_I2 vs1, vs1, vs57 // imagA*realB @@ -823,13 +883,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs59, vs59 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs59,vs59) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs58 // realA*realB XSFADD_R2 vs0, vs0, vs59 // imagA*imagB - xxswapd vs58, vs58 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs59, vs59 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs58,vs58) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs59,vs59) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs58 // realA*imagB XSFADD_I2 vs1, vs1, vs59 // imagA*realB @@ -847,13 +907,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs61, vs61 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs61,vs61) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs60 // realA*realB XSFADD_R2 vs0, vs0, vs61 // imagA*imagB - xxswapd vs60, vs60 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs61, vs61 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs60,vs60) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs61,vs61) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs60 // realA*imagB XSFADD_I2 vs1, vs1, vs61 // imagA*realB @@ -871,13 +931,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs63, vs63 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs63,vs63) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs62 // realA*realB XSFADD_R2 vs0, vs0, vs63 // imagA*imagB - xxswapd vs62, vs62 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs63, vs63 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs62,vs62) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs63,vs63) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs62 // realA*imagB XSFADD_I2 vs1, vs1, vs63 // imagA*realB @@ -918,14 +978,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T2, T2, LDC addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x4_1', ` +#else .macro LOAD2x4_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -942,9 +1010,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_I1', ` +#else .macro KERNEL2x4_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -979,9 +1055,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_1', ` +#else .macro KERNEL2x4_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -1016,9 +1100,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_2', ` +#else .macro KERNEL2x4_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1053,9 +1145,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs11, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_E2', ` +#else .macro KERNEL2x4_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -1077,9 +1177,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs11, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUBI1', ` +#else .macro KERNEL2x4_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1114,9 +1222,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x4_SUB1', ` +#else .macro KERNEL2x4_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1151,9 +1267,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs3, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x4', ` +#else .macro SAVE2x4 +#endif mr T1, CO @@ -1170,13 +1294,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -1194,13 +1318,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -1218,13 +1342,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -1242,13 +1366,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -1291,13 +1415,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs41, vs41 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs41,vs41) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs40 // realA*realB XSFADD_R2 vs0, vs0, vs41 // imagA*imagB - xxswapd vs40, vs40 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs41, vs41 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs40,vs40) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs41,vs41) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs40 // realA*imagB XSFADD_I2 vs1, vs1, vs41 // imagA*realB @@ -1315,13 +1439,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs43, vs43 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs43,vs43) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs42 // realA*realB XSFADD_R2 vs0, vs0, vs43 // imagA*imagB - xxswapd vs42, vs42 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs43, vs43 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs42,vs42) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs43,vs43) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs42 // realA*imagB XSFADD_I2 vs1, vs1, vs43 // imagA*realB @@ -1339,13 +1463,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs45, vs45 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs45,vs45) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs44 // realA*realB XSFADD_R2 vs0, vs0, vs45 // imagA*imagB - xxswapd vs44, vs44 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs45, vs45 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs44,vs44) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs45,vs45) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs44 // realA*imagB XSFADD_I2 vs1, vs1, vs45 // imagA*realB @@ -1363,13 +1487,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs47, vs47 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs47,vs47) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs46 // realA*realB XSFADD_R2 vs0, vs0, vs47 // imagA*imagB - xxswapd vs46, vs46 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs47, vs47 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs46,vs46) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs47,vs47) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs46 // realA*imagB XSFADD_I2 vs1, vs1, vs47 // imagA*realB @@ -1401,14 +1525,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x2_1', ` +#else .macro LOAD2x2_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -1423,9 +1555,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_I1', ` +#else .macro KERNEL2x2_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -1450,9 +1590,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_1', ` +#else .macro KERNEL2x2_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -1477,9 +1625,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_2', ` +#else .macro KERNEL2x2_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1504,9 +1660,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs9, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_E2', ` +#else .macro KERNEL2x2_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -1520,9 +1684,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs9, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUBI1', ` +#else .macro KERNEL2x2_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1547,9 +1719,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x2_SUB1', ` +#else .macro KERNEL2x2_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -1574,9 +1754,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs1, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x2', ` +#else .macro SAVE2x2 +#endif mr T1, CO @@ -1591,13 +1779,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -1615,13 +1803,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -1658,13 +1846,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -1682,13 +1870,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -1716,14 +1904,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=2 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD2x1_1', ` +#else .macro LOAD2x1_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -1737,9 +1933,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_I1', ` +#else .macro KERNEL2x1_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -1759,9 +1963,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_1', ` +#else .macro KERNEL2x1_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -1781,9 +1993,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_2', ` +#else .macro KERNEL2x1_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -1803,9 +2023,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs8, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_E2', ` +#else .macro KERNEL2x1_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -1815,9 +2043,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs8, vs23 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUBI1', ` +#else .macro KERNEL2x1_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -1837,9 +2073,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL2x1_SUB1', ` +#else .macro KERNEL2x1_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -1859,9 +2103,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs0, vs19 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE2x1', ` +#else .macro SAVE2x1 +#endif mr T1, CO @@ -1875,13 +2127,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -1915,13 +2167,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -1947,14 +2199,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=8 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x8_1', ` +#else .macro LOAD1x8_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -1976,9 +2236,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_I1', ` +#else .macro KERNEL1x8_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2017,9 +2285,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_1', ` +#else .macro KERNEL1x8_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2058,9 +2334,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_2', ` +#else .macro KERNEL1x8_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2099,9 +2383,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs15, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_E2', ` +#else .macro KERNEL1x8_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -2122,9 +2414,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs15, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUBI1', ` +#else .macro KERNEL1x8_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2163,9 +2463,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x8_SUB1', ` +#else .macro KERNEL1x8_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2204,9 +2512,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs47, vs7, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x8', ` +#else .macro SAVE1x8 +#endif mr T1, CO @@ -2228,13 +2544,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -2252,13 +2568,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -2276,13 +2592,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -2300,13 +2616,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -2324,13 +2640,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs41, vs41 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs41,vs41) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs40 // realA*realB XSFADD_R2 vs0, vs0, vs41 // imagA*imagB - xxswapd vs40, vs40 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs41, vs41 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs40,vs40) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs41,vs41) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs40 // realA*imagB XSFADD_I2 vs1, vs1, vs41 // imagA*realB @@ -2348,13 +2664,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs43, vs43 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs43,vs43) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs42 // realA*realB XSFADD_R2 vs0, vs0, vs43 // imagA*imagB - xxswapd vs42, vs42 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs43, vs43 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs42,vs42) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs43,vs43) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs42 // realA*imagB XSFADD_I2 vs1, vs1, vs43 // imagA*realB @@ -2372,13 +2688,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs45, vs45 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs45,vs45) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs44 // realA*realB XSFADD_R2 vs0, vs0, vs45 // imagA*imagB - xxswapd vs44, vs44 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs45, vs45 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs44,vs44) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs45,vs45) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs44 // realA*imagB XSFADD_I2 vs1, vs1, vs45 // imagA*realB @@ -2396,13 +2712,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs47, vs47 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs47,vs47) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs46 // realA*realB XSFADD_R2 vs0, vs0, vs47 // imagA*imagB - xxswapd vs46, vs46 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs47, vs47 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs46,vs46) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs47,vs47) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs46 // realA*imagB XSFADD_I2 vs1, vs1, vs47 // imagA*realB @@ -2443,14 +2759,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T2, T2, LDC addi CO, CO, 128 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=4 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x4_1', ` +#else .macro LOAD1x4_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -2465,9 +2789,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 64 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_I1', ` +#else .macro KERNEL1x4_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2491,9 +2823,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_1', ` +#else .macro KERNEL1x4_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2517,9 +2857,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_2', ` +#else .macro KERNEL1x4_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2543,9 +2891,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs11, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_E2', ` +#else .macro KERNEL1x4_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -2558,9 +2914,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs11, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUBI1', ` +#else .macro KERNEL1x4_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2584,9 +2948,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x4_SUB1', ` +#else .macro KERNEL1x4_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2610,9 +2982,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs39, vs3, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x4', ` +#else .macro SAVE1x4 +#endif mr T1, CO @@ -2629,13 +3009,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -2653,13 +3033,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -2677,13 +3057,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs37, vs37 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs37,vs37) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs36 // realA*realB XSFADD_R2 vs0, vs0, vs37 // imagA*imagB - xxswapd vs36, vs36 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs37, vs37 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs36,vs36) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs37,vs37) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs36 // realA*imagB XSFADD_I2 vs1, vs1, vs37 // imagA*realB @@ -2701,13 +3081,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs39, vs39 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs39,vs39) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs38 // realA*realB XSFADD_R2 vs0, vs0, vs39 // imagA*imagB - xxswapd vs38, vs38 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs39, vs39 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs38,vs38) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs39,vs39) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs38 // realA*imagB XSFADD_I2 vs1, vs1, vs39 // imagA*realB @@ -2739,14 +3119,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 64 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=2 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x2_1', ` +#else .macro LOAD1x2_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -2759,9 +3147,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 32 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_I1', ` +#else .macro KERNEL1x2_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2779,9 +3175,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_1', ` +#else .macro KERNEL1x2_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A lxvd2x vs9, o16, AO // load real,imag from A @@ -2799,9 +3203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_2', ` +#else .macro KERNEL1x2_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2819,9 +3231,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs9, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_E2', ` +#else .macro KERNEL1x2_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real @@ -2830,9 +3250,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs9, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUBI1', ` +#else .macro KERNEL1x2_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2850,9 +3278,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x2_SUB1', ` +#else .macro KERNEL1x2_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A lxvd2x vs1, o16, AO // load real,imag from A @@ -2870,9 +3306,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs35, vs1, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x2', ` +#else .macro SAVE1x2 +#endif mr T1, CO @@ -2887,13 +3331,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -2911,13 +3355,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs35, vs35 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs35,vs35) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs34 // realA*realB XSFADD_R2 vs0, vs0, vs35 // imagA*imagB - xxswapd vs34, vs34 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs35, vs35 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs34,vs34) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs35,vs35) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs34 // realA*imagB XSFADD_I2 vs1, vs1, vs35 // imagA*realB @@ -2945,14 +3389,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 32 +#if defined(_AIX) +') +#else .endm +#endif /********************************************************************************************** * Macros for N=1 and M=1 **********************************************************************************************/ +#if defined(_AIX) +define(`LOAD1x1_1', ` +#else .macro LOAD1x1_1 +#endif lxvdsx vs16, o0, BO // load real part from B lxvdsx vs17, o8, BO // load imag part from B @@ -2964,9 +3416,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi AO, AO, 16 +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_I1', ` +#else .macro KERNEL1x1_I1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -2981,9 +3441,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_1', ` +#else .macro KERNEL1x1_1 +#endif lxvd2x vs8, o0, AO // load real,imag from A @@ -2998,9 +3466,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_2', ` +#else .macro KERNEL1x1_2 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -3015,18 +3491,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs33, vs8, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_E2', ` +#else .macro KERNEL1x1_E2 +#endif xvmaddadp vs32, vs8, vs20 // real*real, imag*real xvmaddadp vs33, vs8, vs21 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUBI1', ` +#else .macro KERNEL1x1_SUBI1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -3041,9 +3533,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmuldp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`KERNEL1x1_SUB1', ` +#else .macro KERNEL1x1_SUB1 +#endif lxvd2x vs0, o0, AO // load real,imag from A @@ -3058,9 +3558,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvmaddadp vs33, vs0, vs17 // real*imag, imag*imag +#if defined(_AIX) +') +#else .endm +#endif +#if defined(_AIX) +define(`SAVE1x1', ` +#else .macro SAVE1x1 +#endif mr T1, CO @@ -3074,13 +3582,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xxlxor vs0, vs0, vs0 xxlxor vs1, vs1, vs1 - xxswapd vs33, vs33 // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB + XXSWAPD(vs33,vs33) // realA*imagB, imagA*imagB -> imagA*imagB, realA*imagB XSFADD_R1 vs0, vs0, vs32 // realA*realB XSFADD_R2 vs0, vs0, vs33 // imagA*imagB - xxswapd vs32, vs32 // realA*realB, imagA*realB -> imagA*realB, realA*realB - xxswapd vs33, vs33 // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB + XXSWAPD(vs32,vs32) // realA*realB, imagA*realB -> imagA*realB, realA*realB + XXSWAPD(vs33,vs33) // imagA*imagB, realA*imagB -> realA*imagB, imagA*imagB XSFADD_I1 vs1, vs1, vs32 // realA*imagB XSFADD_I2 vs1, vs1, vs33 // imagA*realB @@ -3106,5 +3614,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add T1, T1, LDC addi CO, CO, 16 +#if defined(_AIX) +') +#else .endm +#endif