Print the wall time (cycles) with enabling FUNCTION_PROFILE.
This commit is contained in:
parent
4335bca2f7
commit
1496383224
|
@ -18,6 +18,7 @@ common:
|
||||||
* Added openblas_set_num_threads for Fortran.
|
* Added openblas_set_num_threads for Fortran.
|
||||||
* Fixed #25 a wrong result of rotmg.
|
* Fixed #25 a wrong result of rotmg.
|
||||||
* Fixed a bug about detecting underscore prefix in c_check.
|
* Fixed a bug about detecting underscore prefix in c_check.
|
||||||
|
* Print the wall time (cycles) with enabling FUNCTION_PROFILE
|
||||||
|
|
||||||
x86/x86_64:
|
x86/x86_64:
|
||||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||||
|
|
|
@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) {
|
||||||
if (cycles > 0) {
|
if (cycles > 0) {
|
||||||
|
|
||||||
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
||||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n");
|
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
|
||||||
|
|
||||||
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
||||||
if (function_profile_table[i].calls) {
|
if (function_profile_table[i].calls) {
|
||||||
#ifndef OS_WINDOWS
|
#ifndef OS_WINDOWS
|
||||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n",
|
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n",
|
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
|
||||||
#endif
|
#endif
|
||||||
func_table[i],
|
func_table[i],
|
||||||
function_profile_table[i].calls,
|
function_profile_table[i].calls,
|
||||||
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
||||||
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
||||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles
|
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles,
|
||||||
|
function_profile_table[i].cycles
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue