From a7f73c764cee8fada4f7f359ae4a8be6b9810ada Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Fri, 27 Oct 2023 16:48:47 +0200
Subject: [PATCH 1/7] Clarify "make" options and the need to repeat them in the
 install step

---
 README.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 081d45870..f57cec831 100644
--- a/README.md
+++ b/README.md
@@ -54,10 +54,15 @@ Building OpenBLAS requires the following to be installed:
 
 Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
 To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
-The full target list is in the file `TargetList.txt`. For building with `cmake`, the
-usual conventions apply, i.e. create a build directory either underneath the toplevel
-OpenBLAS source directory or separate from it, and invoke `cmake` there with the path
-to the source tree and any build options you plan to set.
+The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
+can either be set there (typically by removing the comment character from the respective line), or used on the
+`make` command line. 
+Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
+in the build step, as some settings like the supported maximum number of threads are automatically derived from the
+build host by default, which might not be what you want.
+For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
+OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any 
+build options you plan to set.
 
 ### Cross compile
 

From f5e1f20f4db408d826cb89638175f1987304cf5b Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Fri, 27 Oct 2023 17:10:37 +0200
Subject: [PATCH 2/7] Update target list

---
 README.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f57cec831..3c4e38f18 100644
--- a/README.md
+++ b/README.md
@@ -122,7 +122,7 @@ Use `PREFIX=` when invoking `make`, for example
 ```sh
 make install PREFIX=your_installation_directory
 ```
-
+(along with all options you added on the `make` command line in the preceding build step)
 The default installation directory is `/opt/OpenBLAS`.
 
 ## Supported CPUs and Operating Systems
@@ -142,7 +142,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
 - **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
 - **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
 - **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
-- **AMD ZEN**: Uses Haswell codes with some optimizations.
+- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
 
 #### MIPS32
 
@@ -174,13 +174,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
 - **TSV110**: Optimized some Level-3 helper functions
 - **EMAG 8180**: preliminary support based on A57
 - **Neoverse N1**: (AWS Graviton2) preliminary support
-- **Apple Vortex**: preliminary support based on ARMV8
+- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
+- **Apple Vortex**: preliminary support based on ThunderX2/3
+- **A64FX**:  preliminary support, optimized Level-3 BLAS
+- **ARMV8SVE**: any ARMV8 cpu with SVE extensions 
 
 #### PPC/PPC64
 
 - **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
 - **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only. 
-- **POWER10**:
+- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
 
 #### IBM zEnterprise System
 

From 1cec1c0fc7509a949b65ce5bb50696c18838046e Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 28 Oct 2023 14:43:19 +0200
Subject: [PATCH 3/7] Add FreeBSD clang/gfortran build with OpenMP

---
 .cirrus.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.cirrus.yml b/.cirrus.yml
index 02cd40997..c405b958d 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -148,6 +148,15 @@ FreeBSD_task:
   - ls -l /usr/local/lib
   - gmake CC=gcc INTERFACE64=1
 
+FreeBSD_task:
+  name: FreeBSD-clang-openmp
+  freebsd_instance:
+    image_family: freebsd-13-2
+  install_script:
+  - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc 
+  compile_script:
+  - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
+
 #task:
 #  name: Windows/LLVM16     --- too slow ---
 #  windows_container:

From 289a5f6d9b8570de6fa5c2bf2789e04abce494ea Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 28 Oct 2023 18:44:58 +0200
Subject: [PATCH 4/7] work around libgfortran install issue on FreeBSD

---
 .cirrus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.cirrus.yml b/.cirrus.yml
index c405b958d..5a1f2cfda 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -154,6 +154,7 @@ FreeBSD_task:
     image_family: freebsd-13-2
   install_script:
   - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc 
+  - ln -s /usr/local/gcc12/lib/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
   compile_script:
   - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
 

From dc1c880782e33307aaa2b04467b110003f3305e1 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 28 Oct 2023 23:14:36 +0200
Subject: [PATCH 5/7] fix libgfortran path on bsd

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 5a1f2cfda..6c2baf8a0 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -154,7 +154,7 @@ FreeBSD_task:
     image_family: freebsd-13-2
   install_script:
   - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc 
-  - ln -s /usr/local/gcc12/lib/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
+  - ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
   compile_script:
   - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
 

From d003ad630b1792f169373b8ab35c5ea7a6dfdccd Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 31 Oct 2023 10:26:38 +0100
Subject: [PATCH 6/7] Increase the default GEMM buffer size on modern ARM
 server cpus

---
 common_arm64.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/common_arm64.h b/common_arm64.h
index 436ccb8f5..1e593c66f 100644
--- a/common_arm64.h
+++ b/common_arm64.h
@@ -162,7 +162,11 @@ REALNAME:
 #define HUGE_PAGESIZE   ( 4 << 20)
 
 #ifndef BUFFERSIZE
+if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
+#define BUFFER_SIZE     (32 << 22)
+else
 #define BUFFER_SIZE     (32 << 20)
+#endif
 #else
 #define BUFFER_SIZE	(32 << BUFFERSIZE)
 #endif

From 728788f6676bb5e999cdf4fbcda9e2c7b8b9cd53 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 31 Oct 2023 11:08:22 +0100
Subject: [PATCH 7/7] typo fix

---
 common_arm64.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common_arm64.h b/common_arm64.h
index 1e593c66f..6ae6a35a3 100644
--- a/common_arm64.h
+++ b/common_arm64.h
@@ -162,9 +162,9 @@ REALNAME:
 #define HUGE_PAGESIZE   ( 4 << 20)
 
 #ifndef BUFFERSIZE
-if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
+#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
 #define BUFFER_SIZE     (32 << 22)
-else
+#else
 #define BUFFER_SIZE     (32 << 20)
 #endif
 #else