update example
This commit is contained in:
parent
28330e3548
commit
9cbd9c7ff5
|
|
@ -0,0 +1,277 @@
|
|||
#include "test_utils.h"
|
||||
#include "performance_utils.h"
|
||||
#include "yaml_reporter.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// 实现标记宏 - 参赛者修改实现时请将此宏设为0
|
||||
// ============================================================================
|
||||
#ifndef USE_DEFAULT_REF_IMPL
|
||||
#define USE_DEFAULT_REF_IMPL 1 // 1=默认实现, 0=参赛者自定义实现
|
||||
#endif
|
||||
|
||||
#if USE_DEFAULT_REF_IMPL
|
||||
#include <thrust/reduce.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/functional.h>
|
||||
#endif
|
||||
|
||||
// 误差容忍度
|
||||
constexpr double REDUCE_ERROR_TOLERANCE = 0.005; // 0.5%
|
||||
|
||||
// ============================================================================
|
||||
// ReduceSum算法实现接口
|
||||
// 参赛者需要替换Thrust实现为自己的高性能kernel
|
||||
// ============================================================================
|
||||
|
||||
template <typename InputT = float, typename OutputT = float>
|
||||
class ReduceSumAlgorithm {
|
||||
public:
|
||||
// 主要接口函数 - 参赛者需要实现这个函数
|
||||
void reduce(const InputT* d_in, OutputT* d_out, int num_items, OutputT init_value) {
|
||||
|
||||
#if !USE_DEFAULT_REF_IMPL
|
||||
// ========================================
|
||||
// 参赛者自定义实现区域
|
||||
// ========================================
|
||||
|
||||
// TODO: 参赛者在此实现自己的高性能归约算法
|
||||
|
||||
// 示例:参赛者可以调用1个或多个自定义kernel
|
||||
// blockReduceKernel<<<grid, block>>>(d_in, temp_results, num_items, init_value);
|
||||
// finalReduceKernel<<<1, block>>>(temp_results, d_out, grid.x);
|
||||
#else
|
||||
// ========================================
|
||||
// 默认基准实现
|
||||
// ========================================
|
||||
auto input_ptr = thrust::device_pointer_cast(d_in);
|
||||
auto output_ptr = thrust::device_pointer_cast(d_out);
|
||||
|
||||
// 直接使用thrust::reduce进行归约
|
||||
*output_ptr = thrust::reduce(
|
||||
thrust::device,
|
||||
input_ptr,
|
||||
input_ptr + num_items,
|
||||
static_cast<OutputT>(init_value)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
// 获取当前实现状态
|
||||
static const char* getImplementationStatus() {
|
||||
#if USE_DEFAULT_REF_IMPL
|
||||
return "DEFAULT_REF_IMPL";
|
||||
#else
|
||||
return "CUSTOM_IMPL";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
// 参赛者可以在这里添加辅助函数和成员变量
|
||||
// 例如:中间结果缓冲区、多阶段归约等
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 测试和性能评估
|
||||
// ============================================================================
|
||||
|
||||
bool testCorrectness() {
|
||||
std::cout << "ReduceSum 正确性测试..." << std::endl;
|
||||
TestDataGenerator generator;
|
||||
ReduceSumAlgorithm<float, float> algorithm;
|
||||
|
||||
bool allPassed = true;
|
||||
|
||||
// 测试不同数据规模
|
||||
for (int i = 0; i < NUM_TEST_SIZES && i < 2; i++) { // 限制测试规模
|
||||
int size = std::min(TEST_SIZES[i], 10000);
|
||||
std::cout << " 测试规模: " << size << std::endl;
|
||||
|
||||
// 测试普通数据
|
||||
{
|
||||
auto data = generator.generateRandomFloats(size, -10.0f, 10.0f);
|
||||
float init_value = 1.0f;
|
||||
|
||||
// CPU参考计算
|
||||
double cpu_result = cpuReduceSum(data, static_cast<double>(init_value));
|
||||
|
||||
// GPU计算
|
||||
float *d_in;
|
||||
float *d_out;
|
||||
MACA_CHECK(mcMalloc(&d_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_out, sizeof(float)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_in, data.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
|
||||
algorithm.reduce(d_in, d_out, size, init_value);
|
||||
|
||||
float gpu_result;
|
||||
MACA_CHECK(mcMemcpy(&gpu_result, d_out, sizeof(float), mcMemcpyDeviceToHost));
|
||||
|
||||
// 验证误差
|
||||
double relative_error = std::abs(gpu_result - cpu_result) / std::abs(cpu_result);
|
||||
if (relative_error > REDUCE_ERROR_TOLERANCE) {
|
||||
std::cout << " 失败: 误差过大 " << relative_error << std::endl;
|
||||
allPassed = false;
|
||||
} else {
|
||||
std::cout << " 通过 (误差: " << relative_error << ")" << std::endl;
|
||||
}
|
||||
|
||||
mcFree(d_in);
|
||||
mcFree(d_out);
|
||||
}
|
||||
|
||||
// 测试特殊值 (NaN, Inf)
|
||||
if (size > 100) {
|
||||
std::cout << " 测试特殊值..." << std::endl;
|
||||
auto data = generator.generateSpecialFloats(size);
|
||||
float init_value = 0.0f;
|
||||
|
||||
double cpu_result = cpuReduceSum(data, static_cast<double>(init_value));
|
||||
|
||||
float *d_in;
|
||||
float *d_out;
|
||||
MACA_CHECK(mcMalloc(&d_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_out, sizeof(float)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_in, data.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
|
||||
algorithm.reduce(d_in, d_out, size, init_value);
|
||||
|
||||
float gpu_result;
|
||||
MACA_CHECK(mcMemcpy(&gpu_result, d_out, sizeof(float), mcMemcpyDeviceToHost));
|
||||
|
||||
// 对于包含特殊值的情况,检查是否正确处理
|
||||
if (std::isfinite(cpu_result) && std::isfinite(gpu_result)) {
|
||||
double relative_error = std::abs(gpu_result - cpu_result) / std::abs(cpu_result);
|
||||
if (relative_error > REDUCE_ERROR_TOLERANCE) {
|
||||
std::cout << " 失败: 特殊值处理错误" << std::endl;
|
||||
allPassed = false;
|
||||
} else {
|
||||
std::cout << " 通过 (特殊值处理)" << std::endl;
|
||||
}
|
||||
} else {
|
||||
std::cout << " 通过 (特殊值结果)" << std::endl;
|
||||
}
|
||||
|
||||
mcFree(d_in);
|
||||
mcFree(d_out);
|
||||
}
|
||||
}
|
||||
|
||||
return allPassed;
|
||||
}
|
||||
|
||||
void benchmarkPerformance() {
|
||||
PerformanceDisplay::printReduceSumHeader();
|
||||
|
||||
TestDataGenerator generator;
|
||||
PerformanceMeter meter;
|
||||
ReduceSumAlgorithm<float, float> algorithm;
|
||||
|
||||
const int WARMUP_ITERATIONS = 5;
|
||||
const int BENCHMARK_ITERATIONS = 10;
|
||||
|
||||
// 用于YAML报告的数据收集
|
||||
std::vector<std::map<std::string, std::string>> perf_data;
|
||||
|
||||
for (int i = 0; i < NUM_TEST_SIZES; i++) {
|
||||
int size = TEST_SIZES[i];
|
||||
|
||||
// 生成测试数据
|
||||
auto data = generator.generateRandomFloats(size);
|
||||
float init_value = 0.0f;
|
||||
|
||||
// 分配GPU内存
|
||||
float *d_in;
|
||||
float *d_out;
|
||||
MACA_CHECK(mcMalloc(&d_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_out, sizeof(float)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_in, data.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
|
||||
// Warmup阶段
|
||||
for (int iter = 0; iter < WARMUP_ITERATIONS; iter++) {
|
||||
algorithm.reduce(d_in, d_out, size, init_value);
|
||||
}
|
||||
|
||||
// 正式测试阶段
|
||||
float total_time = 0;
|
||||
for (int iter = 0; iter < BENCHMARK_ITERATIONS; iter++) {
|
||||
meter.startTiming();
|
||||
algorithm.reduce(d_in, d_out, size, init_value);
|
||||
total_time += meter.stopTiming();
|
||||
}
|
||||
|
||||
float avg_time = total_time / BENCHMARK_ITERATIONS;
|
||||
|
||||
// 计算性能指标
|
||||
auto metrics = PerformanceCalculator::calculateReduceSum(size, avg_time);
|
||||
|
||||
// 显示性能数据
|
||||
PerformanceDisplay::printReduceSumData(size, avg_time, metrics);
|
||||
|
||||
// 收集YAML报告数据
|
||||
auto entry = YAMLPerformanceReporter::createEntry();
|
||||
entry["data_size"] = std::to_string(size);
|
||||
entry["time_ms"] = std::to_string(avg_time);
|
||||
entry["throughput_gps"] = std::to_string(metrics.throughput_gps);
|
||||
entry["data_type"] = "float";
|
||||
perf_data.push_back(entry);
|
||||
|
||||
mcFree(d_in);
|
||||
mcFree(d_out);
|
||||
}
|
||||
|
||||
// 生成YAML性能报告
|
||||
YAMLPerformanceReporter::generateReduceSumYAML(perf_data, "reduce_sum_performance.yaml");
|
||||
PerformanceDisplay::printSavedMessage("reduce_sum_performance.yaml");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 主函数
|
||||
// ============================================================================
|
||||
int main(int argc, char* argv[]) {
|
||||
std::cout << "=== ReduceSum 算法测试 ===" << std::endl;
|
||||
|
||||
// 检查参数
|
||||
std::string mode = "all";
|
||||
if (argc > 1) {
|
||||
mode = argv[1];
|
||||
}
|
||||
|
||||
bool correctness_passed = true;
|
||||
bool performance_completed = true;
|
||||
|
||||
try {
|
||||
if (mode == "correctness" || mode == "all") {
|
||||
correctness_passed = testCorrectness();
|
||||
}
|
||||
|
||||
if (mode == "performance" || mode == "all") {
|
||||
if (correctness_passed || mode == "performance") {
|
||||
benchmarkPerformance();
|
||||
} else {
|
||||
std::cout << "跳过性能测试,因为正确性测试未通过" << std::endl;
|
||||
performance_completed = false;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "\n=== 测试完成 ===" << std::endl;
|
||||
std::cout << "实现状态: " << ReduceSumAlgorithm<float, float>::getImplementationStatus() << std::endl;
|
||||
if (mode == "all") {
|
||||
std::cout << "正确性: " << (correctness_passed ? "通过" : "失败") << std::endl;
|
||||
std::cout << "性能测试: " << (performance_completed ? "完成" : "跳过") << std::endl;
|
||||
}
|
||||
|
||||
return correctness_passed ? 0 : 1;
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "测试出错: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,274 @@
|
|||
#!/bin/bash
|
||||
|
||||
# GPU高性能并行计算算法优化竞赛 - 统一编译和运行脚本
|
||||
# 整合了所有算法的编译、运行和公共配置
|
||||
|
||||
# ============================================================================
|
||||
# 公共配置和工具函数
|
||||
# ============================================================================
|
||||
|
||||
# 设置颜色
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[0;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 打印函数
|
||||
print_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
# 编译配置 - 可通过环境变量自定义
|
||||
COMPILER=${COMPILER:-mxcc}
|
||||
COMPILER_FLAGS=${COMPILER_FLAGS:-"-O3 -std=c++17 --extended-lambda -DRUN_FULL_TEST"}
|
||||
|
||||
# ***** 这里是关键修改点1:头文件目录 *****
|
||||
# 现在头文件在 includes/ 目录下
|
||||
HEADER_DIR=${HEADER_DIR:-utils}
|
||||
|
||||
# ***** 这里是关键修改点2:源文件目录 *****
|
||||
# 现在源文件在 algorithms/ 目录下
|
||||
SOURCE_CODE_DIR=${SOURCE_CODE_DIR:-}
|
||||
|
||||
BUILD_DIR=${BUILD_DIR:-build}
|
||||
|
||||
# 编译单个算法的通用函数
|
||||
# 参数: $1=算法名称, $2=源文件名(不含路径)
|
||||
compile_algorithm() {
|
||||
local algo_name="$1"
|
||||
local source_file_name="$2" # 例如 "reduce_sum_algorithm.maca"
|
||||
local target_file="$BUILD_DIR/test_${algo_name,,}" # 转换为小写
|
||||
|
||||
print_info "编译 $algo_name 算法..."
|
||||
|
||||
# 创建构建目录
|
||||
mkdir -p "$BUILD_DIR"
|
||||
|
||||
# ***** 这里是关键修改点3:编译命令 *****
|
||||
# -I$HEADER_DIR 用于告诉编译器头文件在哪里
|
||||
# $SOURCE_CODE_DIR/$source_file_name 用于指定要编译的源文件的完整路径
|
||||
local compile_cmd="$COMPILER $COMPILER_FLAGS -I$HEADER_DIR $source_file_name -o $target_file"
|
||||
|
||||
print_info "执行: $compile_cmd"
|
||||
|
||||
if $compile_cmd; then
|
||||
print_success "$algo_name 编译完成!"
|
||||
echo ""
|
||||
echo "运行测试:"
|
||||
echo " ./$target_file [correctness|performance|all]"
|
||||
return 0
|
||||
else
|
||||
print_error "$algo_name 编译失败!"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 显示编译配置信息
|
||||
show_build_config() {
|
||||
print_info "编译配置:"
|
||||
echo " COMPILER: $COMPILER"
|
||||
echo " COMPILER_FLAGS: $COMPILER_FLAGS"
|
||||
echo " HEADER_DIR: $HEADER_DIR" # 显示头文件目录
|
||||
echo " SOURCE_CODE_DIR: $SOURCE_CODE_DIR" # 显示源文件目录
|
||||
echo " BUILD_DIR: $BUILD_DIR"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 运行单个测试
|
||||
run_single_test() {
|
||||
local algo_name="$1"
|
||||
local test_mode="${2:-all}"
|
||||
local test_file="$BUILD_DIR/test_${algo_name,,}"
|
||||
|
||||
if [ -f "$test_file" ]; then
|
||||
print_info "运行 $algo_name 测试 (模式: $test_mode)..."
|
||||
"./$test_file" "$test_mode"
|
||||
return $?
|
||||
else
|
||||
print_error "$algo_name 测试程序不存在: $test_file"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# 主脚本逻辑
|
||||
# ============================================================================
|
||||
|
||||
# 显示帮助信息 (整合了所有选项)
|
||||
show_help() {
|
||||
echo "GPU算法竞赛统一编译和运行脚本"
|
||||
echo "用法: $0 [选项]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --help 显示帮助信息"
|
||||
echo " --build-only 仅编译所有算法,不运行测试"
|
||||
echo " --run_reduce [MODE] 编译并运行ReduceSum算法测试 (MODE: correctness|performance|all, 默认all)"
|
||||
echo " --run_sort [MODE] 编译并运行SortPair算法测试 (MODE: correctness|performance|all, 默认all)"
|
||||
echo " --run_topk [MODE] 编译并运行TopkPair算法测试 (MODE: correctness|performance|all, 默认all)"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 编译并运行所有测试(默认行为)"
|
||||
echo " $0 --build-only # 仅编译所有算法"
|
||||
echo " $0 --run_sort performance # 编译并运行SortPair性能测试"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
RUN_MODE="run_all" # 默认为编译并运行所有测试
|
||||
ALGO_TO_RUN="" # 记录要运行的单个算法
|
||||
SINGLE_ALGO_TEST_MODE="all" # 单个算法的测试模式
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--build-only)
|
||||
RUN_MODE="build_only"
|
||||
shift
|
||||
;;
|
||||
--run_reduce)
|
||||
RUN_MODE="run_single"
|
||||
ALGO_TO_RUN="ReduceSum"
|
||||
if [[ -n "$2" && "$2" != --* ]]; then
|
||||
SINGLE_ALGO_TEST_MODE="$2"
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
--run_sort)
|
||||
RUN_MODE="run_single"
|
||||
ALGO_TO_RUN="SortPair"
|
||||
if [[ -n "$2" && "$2" != --* ]]; then
|
||||
SINGLE_ALGO_TEST_MODE="$2"
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
--run_topk)
|
||||
RUN_MODE="run_single"
|
||||
ALGO_TO_RUN="TopkPair"
|
||||
if [[ -n "$2" && "$2" != --* ]]; then
|
||||
SINGLE_ALGO_TEST_MODE="$2"
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
print_error "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$RUN_MODE" = "build_only" ]; then
|
||||
print_info "开始编译所有算法..."
|
||||
else
|
||||
print_info "开始编译并运行所有算法..."
|
||||
fi
|
||||
print_info "工作目录: $(pwd)"
|
||||
print_info "编译时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
show_build_config
|
||||
|
||||
# 清理构建目录
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
print_info "清理现有构建目录: $BUILD_DIR"
|
||||
rm -rf "$BUILD_DIR"
|
||||
fi
|
||||
|
||||
# 核心逻辑:根据 RUN_MODE 执行操作
|
||||
case "$RUN_MODE" in
|
||||
"build_only")
|
||||
print_info "编译所有算法..."
|
||||
|
||||
# 直接调用 compile_algorithm 函数
|
||||
print_info "[1/3] 编译ReduceSum..."
|
||||
if ! compile_algorithm "ReduceSum" "reduce_sum_algorithm.maca"; then
|
||||
print_error "ReduceSum编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[2/3] 编译SortPair..."
|
||||
if ! compile_algorithm "SortPair" "sort_pair_algorithm.maca"; then
|
||||
print_error "SortPair编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[3/3] 编译TopkPair..."
|
||||
if ! compile_algorithm "TopkPair" "topk_pair_algorithm.maca"; then
|
||||
print_error "TopkPair编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "所有算法编译完成!"
|
||||
echo ""
|
||||
echo "可执行文件:"
|
||||
echo " $BUILD_DIR/test_reducesum - ReduceSum算法测试"
|
||||
echo " $BUILD_DIR/test_sortpair - SortPair算法测试"
|
||||
echo " $BUILD_DIR/test_topkpair - TopkPair算法测试"
|
||||
echo ""
|
||||
echo "使用方法:"
|
||||
echo " ./$BUILD_DIR/test_reducesum [correctness|performance|all]"
|
||||
echo " ./$BUILD_DIR/test_sortpair [correctness|performance|all]"
|
||||
echo " ./$BUILD_DIR/test_topkpair [correctness|performance|all]"
|
||||
;;
|
||||
|
||||
"run_all")
|
||||
print_info "编译并运行所有算法测试..."
|
||||
|
||||
# 直接调用 compile_algorithm 和 run_single_test 函数
|
||||
print_info "[1/3] ReduceSum..."
|
||||
if compile_algorithm "ReduceSum" "reduce_sum_algorithm.maca"; then
|
||||
run_single_test "ReduceSum" "all"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[2/3] SortPair..."
|
||||
if compile_algorithm "SortPair" "sort_pair_algorithm.maca"; then
|
||||
run_single_test "SortPair" "all"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[3/3] TopkPair..."
|
||||
if compile_algorithm "TopkPair" "topk_pair_algorithm.maca"; then
|
||||
run_single_test "TopkPair" "all"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "所有测试完成!"
|
||||
;;
|
||||
|
||||
"run_single")
|
||||
print_info "编译并运行 ${ALGO_TO_RUN} 测试 (模式: ${SINGLE_ALGO_TEST_MODE})..."
|
||||
local source_file_name=""
|
||||
case "$ALGO_TO_RUN" in
|
||||
"ReduceSum") source_file_name="reduce_sum_algorithm.maca" ;;
|
||||
"SortPair") source_file_name="sort_pair_algorithm.maca" ;;
|
||||
"TopkPair") source_file_name="topk_pair_algorithm.maca" ;;
|
||||
esac
|
||||
|
||||
if compile_algorithm "$ALGO_TO_RUN" "$source_file_name"; then
|
||||
run_single_test "$ALGO_TO_RUN" "$SINGLE_ALGO_TEST_MODE"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
|
@ -0,0 +1,275 @@
|
|||
#include "test_utils.h"
|
||||
#include "performance_utils.h"
|
||||
#include "yaml_reporter.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
|
||||
// ============================================================================
|
||||
// 实现标记宏 - 参赛者修改实现时请将此宏设为0
|
||||
// ============================================================================
|
||||
#ifndef USE_DEFAULT_REF_IMPL
|
||||
#define USE_DEFAULT_REF_IMPL 1 // 1=默认实现, 0=参赛者自定义实现
|
||||
#endif
|
||||
|
||||
#if USE_DEFAULT_REF_IMPL
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/iterator/zip_iterator.h>
|
||||
#include <thrust/tuple.h>
|
||||
#endif
|
||||
|
||||
// ============================================================================
|
||||
// SortPair算法实现接口
|
||||
// 参赛者需要替换Thrust实现为自己的高性能kernel
|
||||
// ============================================================================
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
class SortPairAlgorithm {
|
||||
public:
|
||||
// 主要接口函数 - 参赛者需要实现这个函数
|
||||
void sort(const KeyType* d_keys_in, KeyType* d_keys_out,
|
||||
const ValueType* d_values_in, ValueType* d_values_out,
|
||||
int num_items, bool descending) {
|
||||
|
||||
#if !USE_DEFAULT_REF_IMPL
|
||||
// ========================================
|
||||
// 参赛者自定义实现区域
|
||||
// ========================================
|
||||
|
||||
// TODO: 参赛者在此实现自己的高性能排序算法
|
||||
|
||||
// 示例:参赛者可以调用1个或多个自定义kernel
|
||||
// preprocessKernel<<<grid, block>>>(d_keys_in, d_values_in, num_items);
|
||||
// mainSortKernel<<<grid, block>>>(d_keys_out, d_values_out, num_items, descending);
|
||||
// postprocessKernel<<<grid, block>>>(d_keys_out, d_values_out, num_items);
|
||||
#else
|
||||
// ========================================
|
||||
// 默认基准实现
|
||||
// ========================================
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_keys_out, d_keys_in, num_items * sizeof(KeyType), mcMemcpyDeviceToDevice));
|
||||
MACA_CHECK(mcMemcpy(d_values_out, d_values_in, num_items * sizeof(ValueType), mcMemcpyDeviceToDevice));
|
||||
|
||||
auto key_ptr = thrust::device_pointer_cast(d_keys_out);
|
||||
auto value_ptr = thrust::device_pointer_cast(d_values_out);
|
||||
|
||||
if (descending) {
|
||||
thrust::stable_sort_by_key(thrust::device, key_ptr, key_ptr + num_items, value_ptr, thrust::greater<KeyType>());
|
||||
} else {
|
||||
thrust::stable_sort_by_key(thrust::device, key_ptr, key_ptr + num_items, value_ptr, thrust::less<KeyType>());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// 获取当前实现状态
|
||||
static const char* getImplementationStatus() {
|
||||
#if USE_DEFAULT_REF_IMPL
|
||||
return "DEFAULT_REF_IMPL";
|
||||
#else
|
||||
return "CUSTOM_IMPL";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
// 参赛者可以在这里添加辅助函数和成员变量
|
||||
// 例如:临时缓冲区、多个kernel函数、流等
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 测试和性能评估
|
||||
// ============================================================================
|
||||
|
||||
bool testCorrectness() {
|
||||
std::cout << "SortPair 正确性测试..." << std::endl;
|
||||
TestDataGenerator generator;
|
||||
SortPairAlgorithm<float, uint32_t> algorithm;
|
||||
|
||||
// 测试小规模数据
|
||||
int size = 10000;
|
||||
auto keys = generator.generateRandomFloats(size);
|
||||
auto values = generator.generateRandomUint32(size);
|
||||
|
||||
// 分配GPU内存
|
||||
float *d_keys_in, *d_keys_out;
|
||||
uint32_t *d_values_in, *d_values_out;
|
||||
|
||||
MACA_CHECK(mcMalloc(&d_keys_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_keys_out, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_values_in, size * sizeof(uint32_t)));
|
||||
MACA_CHECK(mcMalloc(&d_values_out, size * sizeof(uint32_t)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_keys_in, keys.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
MACA_CHECK(mcMemcpy(d_values_in, values.data(), size * sizeof(uint32_t), mcMemcpyHostToDevice));
|
||||
|
||||
// 测试升序和降序
|
||||
bool allPassed = true;
|
||||
for (bool descending : {false, true}) {
|
||||
std::cout << " " << (descending ? "降序" : "升序") << " 测试..." << std::endl;
|
||||
|
||||
// CPU参考结果
|
||||
auto cpu_keys = keys;
|
||||
auto cpu_values = values;
|
||||
cpuSortPair(cpu_keys, cpu_values, descending);
|
||||
|
||||
// GPU算法结果
|
||||
algorithm.sort(d_keys_in, d_keys_out, d_values_in, d_values_out, size, descending);
|
||||
|
||||
// 获取结果
|
||||
std::vector<float> gpu_keys(size);
|
||||
std::vector<uint32_t> gpu_values(size);
|
||||
MACA_CHECK(mcMemcpy(gpu_keys.data(), d_keys_out, size * sizeof(float), mcMemcpyDeviceToHost));
|
||||
MACA_CHECK(mcMemcpy(gpu_values.data(), d_values_out, size * sizeof(uint32_t), mcMemcpyDeviceToHost));
|
||||
|
||||
// 验证结果
|
||||
bool keysMatch = compareArrays(cpu_keys, gpu_keys, 1e-5);
|
||||
bool valuesMatch = compareArrays(cpu_values, gpu_values);
|
||||
|
||||
if (!keysMatch || !valuesMatch) {
|
||||
std::cout << " 失败: 结果不匹配" << std::endl;
|
||||
allPassed = false;
|
||||
} else {
|
||||
std::cout << " 通过" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 清理内存
|
||||
mcFree(d_keys_in);
|
||||
mcFree(d_keys_out);
|
||||
mcFree(d_values_in);
|
||||
mcFree(d_values_out);
|
||||
|
||||
return allPassed;
|
||||
}
|
||||
|
||||
void benchmarkPerformance() {
|
||||
PerformanceDisplay::printSortPairHeader();
|
||||
|
||||
TestDataGenerator generator;
|
||||
PerformanceMeter meter;
|
||||
SortPairAlgorithm<float, uint32_t> algorithm;
|
||||
|
||||
const int WARMUP_ITERATIONS = 5;
|
||||
const int BENCHMARK_ITERATIONS = 10;
|
||||
|
||||
// 用于YAML报告的数据收集
|
||||
std::vector<std::map<std::string, std::string>> perf_data;
|
||||
|
||||
for (int i = 0; i < NUM_TEST_SIZES; i++) {
|
||||
int size = TEST_SIZES[i];
|
||||
|
||||
// 生成测试数据
|
||||
auto keys = generator.generateRandomFloats(size);
|
||||
auto values = generator.generateRandomUint32(size);
|
||||
|
||||
// 分配GPU内存
|
||||
float *d_keys_in, *d_keys_out;
|
||||
uint32_t *d_values_in, *d_values_out;
|
||||
|
||||
MACA_CHECK(mcMalloc(&d_keys_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_keys_out, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_values_in, size * sizeof(uint32_t)));
|
||||
MACA_CHECK(mcMalloc(&d_values_out, size * sizeof(uint32_t)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_keys_in, keys.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
MACA_CHECK(mcMemcpy(d_values_in, values.data(), size * sizeof(uint32_t), mcMemcpyHostToDevice));
|
||||
|
||||
float asc_time = 0, desc_time = 0;
|
||||
|
||||
// 测试升序和降序
|
||||
for (bool descending : {false, true}) {
|
||||
// Warmup阶段
|
||||
for (int iter = 0; iter < WARMUP_ITERATIONS; iter++) {
|
||||
algorithm.sort(d_keys_in, d_keys_out, d_values_in, d_values_out, size, descending);
|
||||
}
|
||||
|
||||
// 正式测试阶段
|
||||
float total_time = 0;
|
||||
for (int iter = 0; iter < BENCHMARK_ITERATIONS; iter++) {
|
||||
meter.startTiming();
|
||||
algorithm.sort(d_keys_in, d_keys_out, d_values_in, d_values_out, size, descending);
|
||||
total_time += meter.stopTiming();
|
||||
}
|
||||
|
||||
float avg_time = total_time / BENCHMARK_ITERATIONS;
|
||||
if (descending) {
|
||||
desc_time = avg_time;
|
||||
} else {
|
||||
asc_time = avg_time;
|
||||
}
|
||||
}
|
||||
|
||||
// 计算性能指标
|
||||
auto asc_metrics = PerformanceCalculator::calculateSortPair(size, asc_time);
|
||||
auto desc_metrics = PerformanceCalculator::calculateSortPair(size, desc_time);
|
||||
|
||||
// 显示性能数据
|
||||
PerformanceDisplay::printSortPairData(size, asc_time, desc_time, asc_metrics, desc_metrics);
|
||||
|
||||
// 收集YAML报告数据
|
||||
auto entry = YAMLPerformanceReporter::createEntry();
|
||||
entry["data_size"] = std::to_string(size);
|
||||
entry["asc_time_ms"] = std::to_string(asc_time);
|
||||
entry["desc_time_ms"] = std::to_string(desc_time);
|
||||
entry["asc_throughput_gps"] = std::to_string(asc_metrics.throughput_gps);
|
||||
entry["desc_throughput_gps"] = std::to_string(desc_metrics.throughput_gps);
|
||||
entry["key_type"] = "float";
|
||||
entry["value_type"] = "uint32_t";
|
||||
perf_data.push_back(entry);
|
||||
|
||||
// 清理内存
|
||||
mcFree(d_keys_in);
|
||||
mcFree(d_keys_out);
|
||||
mcFree(d_values_in);
|
||||
mcFree(d_values_out);
|
||||
}
|
||||
|
||||
// 生成YAML性能报告
|
||||
YAMLPerformanceReporter::generateSortPairYAML(perf_data, "sort_pair_performance.yaml");
|
||||
PerformanceDisplay::printSavedMessage("sort_pair_performance.yaml");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 主函数
|
||||
// ============================================================================
|
||||
int main(int argc, char* argv[]) {
|
||||
std::cout << "=== SortPair 算法测试 ===" << std::endl;
|
||||
|
||||
// 检查参数
|
||||
std::string mode = "all";
|
||||
if (argc > 1) {
|
||||
mode = argv[1];
|
||||
}
|
||||
|
||||
bool correctness_passed = true;
|
||||
bool performance_completed = true;
|
||||
|
||||
try {
|
||||
if (mode == "correctness" || mode == "all") {
|
||||
correctness_passed = testCorrectness();
|
||||
}
|
||||
|
||||
if (mode == "performance" || mode == "all") {
|
||||
if (correctness_passed || mode == "performance") {
|
||||
benchmarkPerformance();
|
||||
} else {
|
||||
std::cout << "跳过性能测试,因为正确性测试未通过" << std::endl;
|
||||
performance_completed = false;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "\n=== 测试完成 ===" << std::endl;
|
||||
std::cout << "实现状态: " << SortPairAlgorithm<float, uint32_t>::getImplementationStatus() << std::endl;
|
||||
if (mode == "all") {
|
||||
std::cout << "正确性: " << (correctness_passed ? "通过" : "失败") << std::endl;
|
||||
std::cout << "性能测试: " << (performance_completed ? "完成" : "跳过") << std::endl;
|
||||
}
|
||||
|
||||
return correctness_passed ? 0 : 1;
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "测试出错: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,317 @@
|
|||
#include "test_utils.h"
|
||||
#include "performance_utils.h"
|
||||
#include "yaml_reporter.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <chrono>
|
||||
|
||||
// ============================================================================
|
||||
// 实现标记宏 - 参赛者修改实现时请将此宏设为0
|
||||
// ============================================================================
|
||||
#ifndef USE_DEFAULT_REF_IMPL
|
||||
#define USE_DEFAULT_REF_IMPL 1 // 1=默认实现, 0=参赛者自定义实现
|
||||
#endif
|
||||
|
||||
#if USE_DEFAULT_REF_IMPL
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/iterator/zip_iterator.h>
|
||||
#include <thrust/tuple.h>
|
||||
#include <thrust/copy.h>
|
||||
#endif
|
||||
|
||||
static const int TOPK_VALUES[] = {32, 50, 100, 256, 1024};
|
||||
static const int NUM_TOPK_VALUES = sizeof(TOPK_VALUES) / sizeof(TOPK_VALUES[0]);
|
||||
|
||||
// ============================================================================
|
||||
// TopkPair算法实现接口
|
||||
// 参赛者需要替换Thrust实现为自己的高性能kernel
|
||||
// ============================================================================
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
class TopkPairAlgorithm {
|
||||
public:
|
||||
// 主要接口函数 - 参赛者需要实现这个函数
|
||||
void topk(const KeyType* d_keys_in, KeyType* d_keys_out,
|
||||
const ValueType* d_values_in, ValueType* d_values_out,
|
||||
int num_items, int k, bool descending) {
|
||||
|
||||
#if !USE_DEFAULT_REF_IMPL
|
||||
// ========================================
|
||||
// 参赛者自定义实现区域
|
||||
// ========================================
|
||||
|
||||
// TODO: 参赛者在此实现自己的高性能TopK算法
|
||||
|
||||
// 示例:参赛者可以调用多个自定义kernel
|
||||
// TopkKernel1<<<grid, block>>>(d_keys_in, d_values_in, temp_results, num_items, k);
|
||||
// TopkKernel2<<<grid, block>>>(temp_results, d_keys_out, d_values_out, k, descending);
|
||||
#else
|
||||
// ========================================
|
||||
// 默认基准实现
|
||||
// ========================================
|
||||
|
||||
KeyType* temp_keys;
|
||||
ValueType* temp_values;
|
||||
MACA_CHECK(mcMalloc(&temp_keys, num_items * sizeof(KeyType)));
|
||||
MACA_CHECK(mcMalloc(&temp_values, num_items * sizeof(ValueType)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(temp_keys, d_keys_in, num_items * sizeof(KeyType), mcMemcpyDeviceToDevice));
|
||||
MACA_CHECK(mcMemcpy(temp_values, d_values_in, num_items * sizeof(ValueType), mcMemcpyDeviceToDevice));
|
||||
|
||||
auto key_ptr = thrust::device_pointer_cast(temp_keys);
|
||||
auto value_ptr = thrust::device_pointer_cast(temp_values);
|
||||
|
||||
// 由于greater和less是不同类型,需要分别调用
|
||||
if (descending) {
|
||||
thrust::stable_sort_by_key(thrust::device, key_ptr, key_ptr + num_items, value_ptr, thrust::greater<KeyType>());
|
||||
} else {
|
||||
thrust::stable_sort_by_key(thrust::device, key_ptr, key_ptr + num_items, value_ptr, thrust::less<KeyType>());
|
||||
}
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_keys_out, temp_keys, k * sizeof(KeyType), mcMemcpyDeviceToDevice));
|
||||
MACA_CHECK(mcMemcpy(d_values_out, temp_values, k * sizeof(ValueType), mcMemcpyDeviceToDevice));
|
||||
|
||||
mcFree(temp_keys);
|
||||
mcFree(temp_values);
|
||||
#endif
|
||||
}
|
||||
|
||||
// 获取当前实现状态
|
||||
static const char* getImplementationStatus() {
|
||||
#if USE_DEFAULT_REF_IMPL
|
||||
return "DEFAULT_REF_IMPL";
|
||||
#else
|
||||
return "CUSTOM_IMPL";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
// 参赛者可以在这里添加辅助函数和成员变量
|
||||
// 例如:分块大小、临时缓冲区、多流处理等
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 测试和性能评估
|
||||
// ============================================================================
|
||||
|
||||
bool testCorrectness() {
|
||||
std::cout << "TopkPair 正确性测试..." << std::endl;
|
||||
TestDataGenerator generator;
|
||||
TopkPairAlgorithm<float, uint32_t> algorithm;
|
||||
|
||||
int size = 10000;
|
||||
auto keys = generator.generateRandomFloats(size);
|
||||
auto values = generator.generateRandomUint32(size);
|
||||
|
||||
// 分配GPU内存
|
||||
float *d_keys_in, *d_keys_out;
|
||||
uint32_t *d_values_in, *d_values_out;
|
||||
|
||||
MACA_CHECK(mcMalloc(&d_keys_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_values_in, size * sizeof(uint32_t)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_keys_in, keys.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
MACA_CHECK(mcMemcpy(d_values_in, values.data(), size * sizeof(uint32_t), mcMemcpyHostToDevice));
|
||||
|
||||
bool allPassed = true;
|
||||
|
||||
// 测试不同k值
|
||||
for (int ki = 0; ki < NUM_TOPK_VALUES && ki < 4; ki++) { // 限制测试范围
|
||||
int k = TOPK_VALUES[ki];
|
||||
if (k > size) continue;
|
||||
|
||||
std::cout << " 测试 k=" << k << std::endl;
|
||||
|
||||
MACA_CHECK(mcMalloc(&d_keys_out, k * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_values_out, k * sizeof(uint32_t)));
|
||||
|
||||
for (bool descending : {false, true}) {
|
||||
std::cout << " " << (descending ? "降序" : "升序") << " TopK..." << std::endl;
|
||||
|
||||
// CPU参考结果
|
||||
std::vector<float> cpu_keys_out;
|
||||
std::vector<uint32_t> cpu_values_out;
|
||||
cpuTopkPair(keys, values, cpu_keys_out, cpu_values_out, k, descending);
|
||||
|
||||
// GPU算法结果
|
||||
algorithm.topk(d_keys_in, d_keys_out, d_values_in, d_values_out, size, k, descending);
|
||||
|
||||
// 获取结果
|
||||
std::vector<float> gpu_keys_out(k);
|
||||
std::vector<uint32_t> gpu_values_out(k);
|
||||
MACA_CHECK(mcMemcpy(gpu_keys_out.data(), d_keys_out, k * sizeof(float), mcMemcpyDeviceToHost));
|
||||
MACA_CHECK(mcMemcpy(gpu_values_out.data(), d_values_out, k * sizeof(uint32_t), mcMemcpyDeviceToHost));
|
||||
|
||||
// 验证结果
|
||||
bool keysMatch = compareArrays(cpu_keys_out, gpu_keys_out, 1e-5);
|
||||
bool valuesMatch = compareArrays(cpu_values_out, gpu_values_out);
|
||||
|
||||
if (!keysMatch || !valuesMatch) {
|
||||
std::cout << " 失败: 结果不匹配" << std::endl;
|
||||
allPassed = false;
|
||||
} else {
|
||||
std::cout << " 通过" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
mcFree(d_keys_out);
|
||||
mcFree(d_values_out);
|
||||
}
|
||||
|
||||
// 清理内存
|
||||
mcFree(d_keys_in);
|
||||
mcFree(d_values_in);
|
||||
|
||||
return allPassed;
|
||||
}
|
||||
|
||||
void benchmarkPerformance() {
|
||||
std::cout << "\nTopkPair 性能测试..." << std::endl;
|
||||
std::cout << "数据类型: <float, uint32_t>" << std::endl;
|
||||
std::cout << "计算公式:" << std::endl;
|
||||
std::cout << " 吞吐量 = 元素数 / 时间(s) / 1e9 (G/s)" << std::endl;
|
||||
|
||||
TestDataGenerator generator;
|
||||
PerformanceMeter meter;
|
||||
TopkPairAlgorithm<float, uint32_t> algorithm;
|
||||
|
||||
const int WARMUP_ITERATIONS = 5;
|
||||
const int BENCHMARK_ITERATIONS = 10;
|
||||
|
||||
// 用于YAML报告的数据收集
|
||||
std::vector<std::map<std::string, std::string>> perf_data;
|
||||
|
||||
// 针对不同数据规模测试
|
||||
for (int size_idx = 0; size_idx < NUM_TEST_SIZES; size_idx++) {
|
||||
int size = TEST_SIZES[size_idx];
|
||||
std::cout << "\n数据规模: " << size << std::endl;
|
||||
std::cout << std::setw(8) << "k值" << std::setw(15) << "升序(ms)" << std::setw(15) << "降序(ms)"
|
||||
<< std::setw(16) << "升序(G/s)" << std::setw(16) << "降序(G/s)" << std::endl;
|
||||
std::cout << std::string(74, '-') << std::endl;
|
||||
|
||||
auto keys = generator.generateRandomFloats(size);
|
||||
auto values = generator.generateRandomUint32(size);
|
||||
|
||||
// 分配GPU内存
|
||||
float *d_keys_in;
|
||||
uint32_t *d_values_in;
|
||||
|
||||
MACA_CHECK(mcMalloc(&d_keys_in, size * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_values_in, size * sizeof(uint32_t)));
|
||||
|
||||
MACA_CHECK(mcMemcpy(d_keys_in, keys.data(), size * sizeof(float), mcMemcpyHostToDevice));
|
||||
MACA_CHECK(mcMemcpy(d_values_in, values.data(), size * sizeof(uint32_t), mcMemcpyHostToDevice));
|
||||
|
||||
for (int ki = 0; ki < NUM_TOPK_VALUES; ki++) {
|
||||
int k = TOPK_VALUES[ki];
|
||||
if (k > size) continue;
|
||||
|
||||
float *d_keys_out;
|
||||
uint32_t *d_values_out;
|
||||
MACA_CHECK(mcMalloc(&d_keys_out, k * sizeof(float)));
|
||||
MACA_CHECK(mcMalloc(&d_values_out, k * sizeof(uint32_t)));
|
||||
|
||||
float asc_time = 0, desc_time = 0;
|
||||
|
||||
for (bool descending : {false, true}) {
|
||||
// Warmup阶段
|
||||
for (int iter = 0; iter < WARMUP_ITERATIONS; iter++) {
|
||||
algorithm.topk(d_keys_in, d_keys_out, d_values_in, d_values_out, size, k, descending);
|
||||
}
|
||||
|
||||
// 正式测试阶段
|
||||
float total_time = 0;
|
||||
for (int iter = 0; iter < BENCHMARK_ITERATIONS; iter++) {
|
||||
meter.startTiming();
|
||||
algorithm.topk(d_keys_in, d_keys_out, d_values_in, d_values_out, size, k, descending);
|
||||
total_time += meter.stopTiming();
|
||||
}
|
||||
|
||||
float avg_time = total_time / BENCHMARK_ITERATIONS;
|
||||
if (descending) {
|
||||
desc_time = avg_time;
|
||||
} else {
|
||||
asc_time = avg_time;
|
||||
}
|
||||
}
|
||||
|
||||
// 计算性能指标
|
||||
auto asc_metrics = PerformanceCalculator::calculateTopkPair(size, k, asc_time);
|
||||
auto desc_metrics = PerformanceCalculator::calculateTopkPair(size, k, desc_time);
|
||||
|
||||
// 显示性能数据
|
||||
PerformanceDisplay::printTopkPairData(k, asc_time, desc_time, asc_metrics, desc_metrics);
|
||||
|
||||
// 收集YAML报告数据
|
||||
auto entry = YAMLPerformanceReporter::createEntry();
|
||||
entry["data_size"] = std::to_string(size);
|
||||
entry["k_value"] = std::to_string(k);
|
||||
entry["asc_time_ms"] = std::to_string(asc_time);
|
||||
entry["desc_time_ms"] = std::to_string(desc_time);
|
||||
entry["asc_throughput_gps"] = std::to_string(asc_metrics.throughput_gps);
|
||||
entry["desc_throughput_gps"] = std::to_string(desc_metrics.throughput_gps);
|
||||
entry["key_type"] = "float";
|
||||
entry["value_type"] = "uint32_t";
|
||||
perf_data.push_back(entry);
|
||||
|
||||
mcFree(d_keys_out);
|
||||
mcFree(d_values_out);
|
||||
}
|
||||
|
||||
mcFree(d_keys_in);
|
||||
mcFree(d_values_in);
|
||||
}
|
||||
|
||||
// 生成YAML性能报告
|
||||
YAMLPerformanceReporter::generateTopkPairYAML(perf_data, "topk_pair_performance.yaml");
|
||||
PerformanceDisplay::printSavedMessage("topk_pair_performance.yaml");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 主函数
|
||||
// ============================================================================
|
||||
int main(int argc, char* argv[]) {
|
||||
std::cout << "=== TopkPair 算法测试 ===" << std::endl;
|
||||
|
||||
// 检查参数
|
||||
std::string mode = "all";
|
||||
if (argc > 1) {
|
||||
mode = argv[1];
|
||||
}
|
||||
|
||||
bool correctness_passed = true;
|
||||
bool performance_completed = true;
|
||||
|
||||
try {
|
||||
if (mode == "correctness" || mode == "all") {
|
||||
correctness_passed = testCorrectness();
|
||||
}
|
||||
|
||||
if (mode == "performance" || mode == "all") {
|
||||
if (correctness_passed || mode == "performance") {
|
||||
benchmarkPerformance();
|
||||
} else {
|
||||
std::cout << "跳过性能测试,因为正确性测试未通过" << std::endl;
|
||||
performance_completed = false;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "\n=== 测试完成 ===" << std::endl;
|
||||
std::cout << "实现状态: " << TopkPairAlgorithm<float, uint32_t>::getImplementationStatus() << std::endl;
|
||||
if (mode == "all") {
|
||||
std::cout << "正确性: " << (correctness_passed ? "通过" : "失败") << std::endl;
|
||||
std::cout << "性能测试: " << (performance_completed ? "完成" : "跳过") << std::endl;
|
||||
}
|
||||
|
||||
return correctness_passed ? 0 : 1;
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "测试出错: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,149 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# GPU高性能并行计算算法优化竞赛 - 统一编译脚本
|
||||
# 调用各个独立的build_and_run脚本来编译所有算法
|
||||
|
||||
# 加载公共配置
|
||||
source "$(dirname "$0")/build_common.sh"
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
echo "GPU算法竞赛统一编译和运行脚本"
|
||||
echo "用法: $0 [选项]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --help 显示帮助信息"
|
||||
echo " --build-only 仅编译所有算法,不运行测试"
|
||||
echo " --run_reduce 编译并运行ReduceSum算法测试"
|
||||
echo " --run_sort 编译并运行SortPair算法测试"
|
||||
echo " --run_topk 编译并运行TopkPair算法测试"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 编译并运行所有测试(默认行为)"
|
||||
echo " $0 --build-only # 仅编译所有算法"
|
||||
echo " $0 --run_sort # 编译并运行SortPair测试"
|
||||
echo ""
|
||||
echo "单独算法脚本:"
|
||||
echo " ./build_and_run_reduce_sum.sh # 仅处理ReduceSum"
|
||||
echo " ./build_and_run_sort_pair.sh # 仅处理SortPair"
|
||||
echo " ./build_and_run_topk_pair.sh # 仅处理TopkPair"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
RUN_MODE="run_all" # 默认为编译并运行所有测试
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--build-only)
|
||||
RUN_MODE="build_only"
|
||||
shift
|
||||
;;
|
||||
--run_reduce)
|
||||
RUN_MODE="run_reduce"
|
||||
shift
|
||||
;;
|
||||
--run_sort)
|
||||
RUN_MODE="run_sort"
|
||||
shift
|
||||
;;
|
||||
--run_topk)
|
||||
RUN_MODE="run_topk"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
print_error "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$RUN_MODE" = "build_only" ]; then
|
||||
print_info "开始编译所有算法..."
|
||||
else
|
||||
print_info "开始编译并运行所有算法..."
|
||||
fi
|
||||
print_info "工作目录: $(pwd)"
|
||||
print_info "编译时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
show_build_config
|
||||
|
||||
# 清理构建目录
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
print_info "清理现有构建目录: $BUILD_DIR"
|
||||
rm -rf "$BUILD_DIR"
|
||||
fi
|
||||
|
||||
# 根据运行模式执行相应操作
|
||||
case "$RUN_MODE" in
|
||||
"build_only")
|
||||
print_info "编译所有算法..."
|
||||
|
||||
print_info "[1/3] 编译ReduceSum..."
|
||||
./build_and_run_reduce_sum.sh --build-only
|
||||
if [ $? -ne 0 ]; then
|
||||
print_error "ReduceSum编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[2/3] 编译SortPair..."
|
||||
./build_and_run_sort_pair.sh --build-only
|
||||
if [ $? -ne 0 ]; then
|
||||
print_error "SortPair编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[3/3] 编译TopkPair..."
|
||||
./build_and_run_topk_pair.sh --build-only
|
||||
if [ $? -ne 0 ]; then
|
||||
print_error "TopkPair编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "所有算法编译完成!"
|
||||
echo ""
|
||||
echo "可执行文件:"
|
||||
echo " $BUILD_DIR/test_reducesum - ReduceSum算法测试"
|
||||
echo " $BUILD_DIR/test_sortpair - SortPair算法测试"
|
||||
echo " $BUILD_DIR/test_topkpair - TopkPair算法测试"
|
||||
echo ""
|
||||
echo "使用方法:"
|
||||
echo " ./$BUILD_DIR/test_reducesum [correctness|performance|all]"
|
||||
echo " ./$BUILD_DIR/test_sortpair [correctness|performance|all]"
|
||||
echo " ./$BUILD_DIR/test_topkpair [correctness|performance|all]"
|
||||
;;
|
||||
|
||||
"run_all")
|
||||
print_info "编译并运行所有算法测试..."
|
||||
|
||||
print_info "[1/3] ReduceSum..."
|
||||
./build_and_run_reduce_sum.sh --run all
|
||||
|
||||
print_info "[2/3] SortPair..."
|
||||
./build_and_run_sort_pair.sh --run all
|
||||
|
||||
print_info "[3/3] TopkPair..."
|
||||
./build_and_run_topk_pair.sh --run all
|
||||
|
||||
print_success "所有测试完成!"
|
||||
;;
|
||||
|
||||
"run_reduce")
|
||||
print_info "编译并运行ReduceSum测试..."
|
||||
./build_and_run_reduce_sum.sh --run all
|
||||
;;
|
||||
|
||||
"run_sort")
|
||||
print_info "编译并运行SortPair测试..."
|
||||
./build_and_run_sort_pair.sh --run all
|
||||
;;
|
||||
|
||||
"run_topk")
|
||||
print_info "编译并运行TopkPair测试..."
|
||||
./build_and_run_topk_pair.sh --run all
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# ReduceSum算法编译和运行脚本
|
||||
# 用于参赛者只选择ReduceSum题目时使用
|
||||
|
||||
# 加载公共配置
|
||||
source "$(dirname "$0")/build_common.sh"
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
echo "ReduceSum算法编译和运行脚本"
|
||||
echo "用法: $0 [选项]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --help 显示此帮助信息"
|
||||
echo " --build-only 仅编译,不运行测试"
|
||||
echo " --run MODE 编译并运行测试 (MODE: correctness|performance|all)"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 编译并运行所有测试(默认行为)"
|
||||
echo " $0 --build-only # 仅编译"
|
||||
echo " $0 --run correctness # 编译并运行正确性测试"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
RUN_MODE="all" # 默认运行所有测试
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--build-only)
|
||||
RUN_MODE=""
|
||||
shift
|
||||
;;
|
||||
--run)
|
||||
RUN_MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_error "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 编译ReduceSum算法
|
||||
if compile_algorithm "ReduceSum" "reduce_sum_algorithm.maca"; then
|
||||
# 如果指定了运行模式,则运行测试
|
||||
if [ -n "$RUN_MODE" ]; then
|
||||
echo ""
|
||||
run_single_test "ReduceSum" "$RUN_MODE"
|
||||
fi
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# SortPair算法编译和运行脚本
|
||||
# 用于参赛者只选择SortPair题目时使用
|
||||
|
||||
# 加载公共配置
|
||||
source "$(dirname "$0")/build_common.sh"
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
echo "SortPair算法编译和运行脚本"
|
||||
echo "用法: $0 [选项]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --help 显示此帮助信息"
|
||||
echo " --build-only 仅编译,不运行测试"
|
||||
echo " --run MODE 编译并运行测试 (MODE: correctness|performance|all)"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 编译并运行所有测试(默认行为)"
|
||||
echo " $0 --build-only # 仅编译"
|
||||
echo " $0 --run correctness # 编译并运行正确性测试"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
RUN_MODE="all" # 默认运行所有测试
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--build-only)
|
||||
RUN_MODE=""
|
||||
shift
|
||||
;;
|
||||
--run)
|
||||
RUN_MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_error "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 编译SortPair算法
|
||||
if compile_algorithm "SortPair" "sort_pair_algorithm.maca"; then
|
||||
# 如果指定了运行模式,则运行测试
|
||||
if [ -n "$RUN_MODE" ]; then
|
||||
echo ""
|
||||
run_single_test "SortPair" "$RUN_MODE"
|
||||
fi
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# TopkPair算法编译和运行脚本
|
||||
# 用于参赛者只选择TopkPair题目时使用
|
||||
|
||||
# 加载公共配置
|
||||
source "$(dirname "$0")/build_common.sh"
|
||||
|
||||
# 显示帮助信息
|
||||
show_help() {
|
||||
echo "TopkPair算法编译和运行脚本"
|
||||
echo "用法: $0 [选项]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --help 显示此帮助信息"
|
||||
echo " --build-only 仅编译,不运行测试"
|
||||
echo " --run MODE 编译并运行测试 (MODE: correctness|performance|all)"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 编译并运行所有测试(默认行为)"
|
||||
echo " $0 --build-only # 仅编译"
|
||||
echo " $0 --run correctness # 编译并运行正确性测试"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
RUN_MODE="all" # 默认运行所有测试
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--build-only)
|
||||
RUN_MODE=""
|
||||
shift
|
||||
;;
|
||||
--run)
|
||||
RUN_MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
print_error "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 编译TopkPair算法
|
||||
if compile_algorithm "TopkPair" "topk_pair_algorithm.maca"; then
|
||||
# 如果指定了运行模式,则运行测试
|
||||
if [ -n "$RUN_MODE" ]; then
|
||||
echo ""
|
||||
run_single_test "TopkPair" "$RUN_MODE"
|
||||
fi
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# GPU算法竞赛公共编译配置
|
||||
# 被各个独立编译脚本调用
|
||||
|
||||
# 设置颜色
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[0;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 打印函数
|
||||
print_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
# 编译配置 - 可通过环境变量自定义
|
||||
COMPILER=${COMPILER:-mxcc}
|
||||
|
||||
#COMPILER_FLAGS=${COMPILER_FLAGS:--O3 -std=c++17 --extended-lambda} # not run all test for easy debug
|
||||
COMPILER_FLAGS=${COMPILER_FLAGS:--O3 -std=c++17 --extended-lambda -DRUN_FULL_TEST}
|
||||
|
||||
INCLUDE_DIR=${INCLUDE_DIR:-cp_template}
|
||||
BUILD_DIR=${BUILD_DIR:-build}
|
||||
|
||||
# 编译单个算法的通用函数
|
||||
# 参数: $1=算法名称, $2=源文件名
|
||||
compile_algorithm() {
|
||||
local algo_name="$1"
|
||||
local source_file="$2"
|
||||
local target_file="$BUILD_DIR/test_${algo_name,,}" # 转换为小写
|
||||
|
||||
print_info "编译 $algo_name 算法..."
|
||||
|
||||
# 创建构建目录
|
||||
mkdir -p "$BUILD_DIR"
|
||||
|
||||
# 编译命令
|
||||
local compile_cmd="$COMPILER $COMPILER_FLAGS -I$INCLUDE_DIR cp_template/$source_file -o $target_file"
|
||||
|
||||
print_info "执行: $compile_cmd"
|
||||
|
||||
if $compile_cmd; then
|
||||
print_success "$algo_name 编译完成!"
|
||||
echo ""
|
||||
echo "运行测试:"
|
||||
echo " ./$target_file [correctness|performance|all]"
|
||||
return 0
|
||||
else
|
||||
print_error "$algo_name 编译失败!"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 显示编译配置信息
|
||||
show_build_config() {
|
||||
print_info "编译配置:"
|
||||
echo " COMPILER: $COMPILER"
|
||||
echo " COMPILER_FLAGS: $COMPILER_FLAGS"
|
||||
echo " INCLUDE_DIR: $INCLUDE_DIR"
|
||||
echo " BUILD_DIR: $BUILD_DIR"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 运行单个测试
|
||||
run_single_test() {
|
||||
local algo_name="$1"
|
||||
local test_mode="${2:-all}"
|
||||
local test_file="$BUILD_DIR/test_${algo_name,,}"
|
||||
|
||||
if [ -f "$test_file" ]; then
|
||||
print_info "运行 $algo_name 测试 (模式: $test_mode)..."
|
||||
"./$test_file" "$test_mode"
|
||||
return $?
|
||||
else
|
||||
print_error "$algo_name 测试程序不存在: $test_file"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
|
@ -0,0 +1,274 @@
|
|||
#!/bin/bash
|
||||
|
||||
# GPU高性能并行计算算法优化竞赛 - 统一编译和运行脚本
|
||||
# 整合了所有算法的编译、运行和公共配置
|
||||
|
||||
# ============================================================================
|
||||
# 公共配置和工具函数
|
||||
# ============================================================================
|
||||
|
||||
# 设置颜色
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[0;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 打印函数
|
||||
print_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
# 编译配置 - 可通过环境变量自定义
|
||||
COMPILER=${COMPILER:-mxcc}
|
||||
COMPILER_FLAGS=${COMPILER_FLAGS:-"-O3 -std=c++17 --extended-lambda -DRUN_FULL_TEST"}
|
||||
|
||||
# ***** 这里是关键修改点1:头文件目录 *****
|
||||
# 现在头文件在 includes/ 目录下
|
||||
HEADER_DIR=${HEADER_DIR:-utils}
|
||||
|
||||
# ***** 这里是关键修改点2:源文件目录 *****
|
||||
# 现在源文件在 algorithms/ 目录下
|
||||
SOURCE_CODE_DIR=${SOURCE_CODE_DIR:-}
|
||||
|
||||
BUILD_DIR=${BUILD_DIR:-build}
|
||||
|
||||
# 编译单个算法的通用函数
|
||||
# 参数: $1=算法名称, $2=源文件名(不含路径)
|
||||
compile_algorithm() {
|
||||
local algo_name="$1"
|
||||
local source_file_name="$2" # 例如 "reduce_sum_algorithm.maca"
|
||||
local target_file="$BUILD_DIR/test_${algo_name,,}" # 转换为小写
|
||||
|
||||
print_info "编译 $algo_name 算法..."
|
||||
|
||||
# 创建构建目录
|
||||
mkdir -p "$BUILD_DIR"
|
||||
|
||||
# ***** 这里是关键修改点3:编译命令 *****
|
||||
# -I$HEADER_DIR 用于告诉编译器头文件在哪里
|
||||
# $SOURCE_CODE_DIR/$source_file_name 用于指定要编译的源文件的完整路径
|
||||
local compile_cmd="$COMPILER $COMPILER_FLAGS -I$HEADER_DIR $source_file_name -o $target_file"
|
||||
|
||||
print_info "执行: $compile_cmd"
|
||||
|
||||
if $compile_cmd; then
|
||||
print_success "$algo_name 编译完成!"
|
||||
echo ""
|
||||
echo "运行测试:"
|
||||
echo " ./$target_file [correctness|performance|all]"
|
||||
return 0
|
||||
else
|
||||
print_error "$algo_name 编译失败!"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 显示编译配置信息
|
||||
show_build_config() {
|
||||
print_info "编译配置:"
|
||||
echo " COMPILER: $COMPILER"
|
||||
echo " COMPILER_FLAGS: $COMPILER_FLAGS"
|
||||
echo " HEADER_DIR: $HEADER_DIR" # 显示头文件目录
|
||||
echo " SOURCE_CODE_DIR: $SOURCE_CODE_DIR" # 显示源文件目录
|
||||
echo " BUILD_DIR: $BUILD_DIR"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 运行单个测试
|
||||
run_single_test() {
|
||||
local algo_name="$1"
|
||||
local test_mode="${2:-all}"
|
||||
local test_file="$BUILD_DIR/test_${algo_name,,}"
|
||||
|
||||
if [ -f "$test_file" ]; then
|
||||
print_info "运行 $algo_name 测试 (模式: $test_mode)..."
|
||||
"./$test_file" "$test_mode"
|
||||
return $?
|
||||
else
|
||||
print_error "$algo_name 测试程序不存在: $test_file"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# 主脚本逻辑
|
||||
# ============================================================================
|
||||
|
||||
# 显示帮助信息 (整合了所有选项)
|
||||
show_help() {
|
||||
echo "GPU算法竞赛统一编译和运行脚本"
|
||||
echo "用法: $0 [选项]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " --help 显示帮助信息"
|
||||
echo " --build-only 仅编译所有算法,不运行测试"
|
||||
echo " --run_reduce [MODE] 编译并运行ReduceSum算法测试 (MODE: correctness|performance|all, 默认all)"
|
||||
echo " --run_sort [MODE] 编译并运行SortPair算法测试 (MODE: correctness|performance|all, 默认all)"
|
||||
echo " --run_topk [MODE] 编译并运行TopkPair算法测试 (MODE: correctness|performance|all, 默认all)"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 编译并运行所有测试(默认行为)"
|
||||
echo " $0 --build-only # 仅编译所有算法"
|
||||
echo " $0 --run_sort performance # 编译并运行SortPair性能测试"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 解析命令行参数
|
||||
RUN_MODE="run_all" # 默认为编译并运行所有测试
|
||||
ALGO_TO_RUN="" # 记录要运行的单个算法
|
||||
SINGLE_ALGO_TEST_MODE="all" # 单个算法的测试模式
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--build-only)
|
||||
RUN_MODE="build_only"
|
||||
shift
|
||||
;;
|
||||
--run_reduce)
|
||||
RUN_MODE="run_single"
|
||||
ALGO_TO_RUN="ReduceSum"
|
||||
if [[ -n "$2" && "$2" != --* ]]; then
|
||||
SINGLE_ALGO_TEST_MODE="$2"
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
--run_sort)
|
||||
RUN_MODE="run_single"
|
||||
ALGO_TO_RUN="SortPair"
|
||||
if [[ -n "$2" && "$2" != --* ]]; then
|
||||
SINGLE_ALGO_TEST_MODE="$2"
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
--run_topk)
|
||||
RUN_MODE="run_single"
|
||||
ALGO_TO_RUN="TopkPair"
|
||||
if [[ -n "$2" && "$2" != --* ]]; then
|
||||
SINGLE_ALGO_TEST_MODE="$2"
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
print_error "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$RUN_MODE" = "build_only" ]; then
|
||||
print_info "开始编译所有算法..."
|
||||
else
|
||||
print_info "开始编译并运行所有算法..."
|
||||
fi
|
||||
print_info "工作目录: $(pwd)"
|
||||
print_info "编译时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
show_build_config
|
||||
|
||||
# 清理构建目录
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
print_info "清理现有构建目录: $BUILD_DIR"
|
||||
rm -rf "$BUILD_DIR"
|
||||
fi
|
||||
|
||||
# 核心逻辑:根据 RUN_MODE 执行操作
|
||||
case "$RUN_MODE" in
|
||||
"build_only")
|
||||
print_info "编译所有算法..."
|
||||
|
||||
# 直接调用 compile_algorithm 函数
|
||||
print_info "[1/3] 编译ReduceSum..."
|
||||
if ! compile_algorithm "ReduceSum" "reduce_sum_algorithm.maca"; then
|
||||
print_error "ReduceSum编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[2/3] 编译SortPair..."
|
||||
if ! compile_algorithm "SortPair" "sort_pair_algorithm.maca"; then
|
||||
print_error "SortPair编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[3/3] 编译TopkPair..."
|
||||
if ! compile_algorithm "TopkPair" "topk_pair_algorithm.maca"; then
|
||||
print_error "TopkPair编译失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "所有算法编译完成!"
|
||||
echo ""
|
||||
echo "可执行文件:"
|
||||
echo " $BUILD_DIR/test_reducesum - ReduceSum算法测试"
|
||||
echo " $BUILD_DIR/test_sortpair - SortPair算法测试"
|
||||
echo " $BUILD_DIR/test_topkpair - TopkPair算法测试"
|
||||
echo ""
|
||||
echo "使用方法:"
|
||||
echo " ./$BUILD_DIR/test_reducesum [correctness|performance|all]"
|
||||
echo " ./$BUILD_DIR/test_sortpair [correctness|performance|all]"
|
||||
echo " ./$BUILD_DIR/test_topkpair [correctness|performance|all]"
|
||||
;;
|
||||
|
||||
"run_all")
|
||||
print_info "编译并运行所有算法测试..."
|
||||
|
||||
# 直接调用 compile_algorithm 和 run_single_test 函数
|
||||
print_info "[1/3] ReduceSum..."
|
||||
if compile_algorithm "ReduceSum" "reduce_sum_algorithm.maca"; then
|
||||
run_single_test "ReduceSum" "all"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[2/3] SortPair..."
|
||||
if compile_algorithm "SortPair" "sort_pair_algorithm.maca"; then
|
||||
run_single_test "SortPair" "all"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "[3/3] TopkPair..."
|
||||
if compile_algorithm "TopkPair" "topk_pair_algorithm.maca"; then
|
||||
run_single_test "TopkPair" "all"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "所有测试完成!"
|
||||
;;
|
||||
|
||||
"run_single")
|
||||
print_info "编译并运行 ${ALGO_TO_RUN} 测试 (模式: ${SINGLE_ALGO_TEST_MODE})..."
|
||||
local source_file_name=""
|
||||
case "$ALGO_TO_RUN" in
|
||||
"ReduceSum") source_file_name="reduce_sum_algorithm.maca" ;;
|
||||
"SortPair") source_file_name="sort_pair_algorithm.maca" ;;
|
||||
"TopkPair") source_file_name="topk_pair_algorithm.maca" ;;
|
||||
esac
|
||||
|
||||
if compile_algorithm "$ALGO_TO_RUN" "$source_file_name"; then
|
||||
run_single_test "$ALGO_TO_RUN" "$SINGLE_ALGO_TEST_MODE"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
#pragma once
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
|
||||
// ============================================================================
|
||||
// 性能计算和显示工具
|
||||
// ============================================================================
|
||||
|
||||
class PerformanceCalculator {
|
||||
public:
|
||||
// ReduceSum性能计算
|
||||
struct ReduceSumMetrics {
|
||||
double throughput_gps; // G elements/s
|
||||
};
|
||||
|
||||
static ReduceSumMetrics calculateReduceSum(int size, float time_ms) {
|
||||
ReduceSumMetrics metrics;
|
||||
metrics.throughput_gps = (size / 1e9) / (time_ms / 1000.0);
|
||||
return metrics;
|
||||
}
|
||||
|
||||
// SortPair性能计算
|
||||
struct SortPairMetrics {
|
||||
double throughput_gps; // G elements/s
|
||||
};
|
||||
|
||||
static SortPairMetrics calculateSortPair(int size, float time_ms) {
|
||||
SortPairMetrics metrics;
|
||||
metrics.throughput_gps = (size / 1e9) / (time_ms / 1000.0);
|
||||
return metrics;
|
||||
}
|
||||
|
||||
// TopkPair性能计算
|
||||
struct TopkPairMetrics {
|
||||
double throughput_gps; // G elements/s
|
||||
};
|
||||
|
||||
static TopkPairMetrics calculateTopkPair(int size, int k, float time_ms) {
|
||||
TopkPairMetrics metrics;
|
||||
metrics.throughput_gps = (size / 1e9) / (time_ms / 1000.0);
|
||||
return metrics;
|
||||
}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 性能显示工具
|
||||
// ============================================================================
|
||||
|
||||
class PerformanceDisplay {
|
||||
public:
|
||||
// 显示ReduceSum性能表头
|
||||
static void printReduceSumHeader() {
|
||||
std::cout << "\nReduceSum 性能测试..." << std::endl;
|
||||
std::cout << "数据类型: float -> float" << std::endl;
|
||||
std::cout << "计算公式:" << std::endl;
|
||||
std::cout << " 吞吐量 = 元素数 / 时间(s) / 1e9 (G/s)" << std::endl;
|
||||
std::cout << std::setw(12) << "数据规模" << std::setw(15) << "时间(ms)"
|
||||
<< std::setw(20) << "吞吐量(G/s)" << std::endl;
|
||||
std::cout << std::string(47, '-') << std::endl;
|
||||
}
|
||||
|
||||
// 显示SortPair性能表头
|
||||
static void printSortPairHeader() {
|
||||
std::cout << "\nSortPair 性能测试..." << std::endl;
|
||||
std::cout << "数据类型: <float, uint32_t>" << std::endl;
|
||||
std::cout << "计算公式:" << std::endl;
|
||||
std::cout << " 吞吐量 = 元素数 / 时间(s) / 1e9 (G/s)" << std::endl;
|
||||
std::cout << std::setw(12) << "数据规模" << std::setw(15) << "升序(ms)" << std::setw(15) << "降序(ms)"
|
||||
<< std::setw(16) << "升序(G/s)" << std::setw(16) << "降序(G/s)" << std::endl;
|
||||
std::cout << std::string(78, '-') << std::endl;
|
||||
}
|
||||
|
||||
// 显示TopkPair性能表头
|
||||
static void printTopkPairHeader() {
|
||||
std::cout << "\nTopkPair 性能测试..." << std::endl;
|
||||
std::cout << "数据类型: <float, uint32_t>" << std::endl;
|
||||
std::cout << "计算公式:" << std::endl;
|
||||
std::cout << " 吞吐量 = 元素数 / 时间(s) / 1e9 (G/s)" << std::endl;
|
||||
}
|
||||
|
||||
static void printTopkPairDataHeader() {
|
||||
std::cout << std::setw(8) << "k值" << std::setw(15) << "升序(ms)" << std::setw(15) << "降序(ms)"
|
||||
<< std::setw(16) << "升序(G/s)" << std::setw(16) << "降序(G/s)" << std::endl;
|
||||
std::cout << std::string(74, '-') << std::endl;
|
||||
}
|
||||
|
||||
// 显示性能数据行
|
||||
static void printReduceSumData(int size, float time_ms, const PerformanceCalculator::ReduceSumMetrics& metrics) {
|
||||
std::cout << std::setw(12) << size << std::setw(15) << std::fixed << std::setprecision(3)
|
||||
<< time_ms << std::setw(20) << std::setprecision(3) << metrics.throughput_gps << std::endl;
|
||||
}
|
||||
|
||||
static void printSortPairData(int size, float asc_time, float desc_time,
|
||||
const PerformanceCalculator::SortPairMetrics& asc_metrics,
|
||||
const PerformanceCalculator::SortPairMetrics& desc_metrics) {
|
||||
std::cout << std::setw(12) << size << std::setw(15) << std::fixed << std::setprecision(3)
|
||||
<< asc_time << std::setw(15) << desc_time << std::setw(16) << std::setprecision(3)
|
||||
<< asc_metrics.throughput_gps << std::setw(16) << desc_metrics.throughput_gps << std::endl;
|
||||
}
|
||||
|
||||
static void printTopkPairData(int k, float asc_time, float desc_time,
|
||||
const PerformanceCalculator::TopkPairMetrics& asc_metrics,
|
||||
const PerformanceCalculator::TopkPairMetrics& desc_metrics) {
|
||||
std::cout << std::setw(8) << k << std::setw(15) << std::fixed << std::setprecision(3)
|
||||
<< asc_time << std::setw(15) << desc_time << std::setw(16) << std::setprecision(3)
|
||||
<< asc_metrics.throughput_gps << std::setw(16) << desc_metrics.throughput_gps << std::endl;
|
||||
}
|
||||
|
||||
// 显示性能文件保存消息
|
||||
static void printSavedMessage(const std::string& filename) {
|
||||
std::cout << "\n性能结果已保存到: " << filename << std::endl;
|
||||
}
|
||||
};
|
||||
|
|
@ -0,0 +1,234 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <algorithm>
|
||||
#include <mc_runtime.h>
|
||||
#include <maca_fp16.h>
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
|
||||
// 引入模块化头文件
|
||||
#include "yaml_reporter.h"
|
||||
#include "performance_utils.h"
|
||||
|
||||
// ============================================================================
|
||||
// 测试配置常量
|
||||
// ============================================================================
|
||||
#ifndef RUN_FULL_TEST
|
||||
const int TEST_SIZES[] = {1000000, 134217728}; // 1M, 128M, 512M, 1G
|
||||
#else
|
||||
const int TEST_SIZES[] = {1000000, 134217728, 536870912, 1073741824}; // 1M, 128M, 512M, 1G
|
||||
#endif
|
||||
|
||||
const int NUM_TEST_SIZES = sizeof(TEST_SIZES) / sizeof(TEST_SIZES[0]);
|
||||
|
||||
// 性能测试重复次数
|
||||
constexpr int WARMUP_ITERATIONS = 5;
|
||||
constexpr int BENCHMARK_ITERATIONS = 10;
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// 错误检查宏
|
||||
// ============================================================================
|
||||
#define MACA_CHECK(call) \
|
||||
do { \
|
||||
mcError_t error = call; \
|
||||
if (error != mcSuccess) { \
|
||||
std::cerr << "MACA error at " << __FILE__ << ":" << __LINE__ \
|
||||
<< " - " << mcGetErrorString(error) << std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// ============================================================================
|
||||
// 测试数据生成器
|
||||
// ============================================================================
|
||||
class TestDataGenerator {
|
||||
private:
|
||||
std::mt19937 rng;
|
||||
|
||||
public:
|
||||
TestDataGenerator(uint32_t seed = 42) : rng(seed) {}
|
||||
|
||||
// 生成随机float数组
|
||||
std::vector<float> generateRandomFloats(int size, float min_val = -1000.0f, float max_val = 1000.0f) {
|
||||
std::vector<float> data(size);
|
||||
std::uniform_real_distribution<float> dist(min_val, max_val);
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = dist(rng);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// 生成随机half数组
|
||||
std::vector<half> generateRandomHalfs(int size, float min_val = -100.0f, float max_val = 100.0f) {
|
||||
std::vector<half> data(size);
|
||||
std::uniform_real_distribution<float> dist(min_val, max_val);
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = __float2half(dist(rng));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// 生成随机uint32_t数组
|
||||
std::vector<uint32_t> generateRandomUint32(int size) {
|
||||
std::vector<uint32_t> data(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = static_cast<uint32_t>(i); // 使用索引作为值,便于验证稳定排序
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// 生成随机int64_t数组
|
||||
std::vector<int64_t> generateRandomInt64(int size) {
|
||||
std::vector<int64_t> data(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = static_cast<int64_t>(i);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// 生成包含NaN和Inf的测试数据 (half版本)
|
||||
std::vector<half> generateSpecialHalfs(int size) {
|
||||
std::vector<half> data = generateRandomHalfs(size, -10.0f, 10.0f);
|
||||
if (size > 100) {
|
||||
data[10] = __float2half(NAN);
|
||||
data[20] = __float2half(INFINITY);
|
||||
data[30] = __float2half(-INFINITY);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// 生成包含NaN和Inf的测试数据 (float版本)
|
||||
std::vector<float> generateSpecialFloats(int size) {
|
||||
std::vector<float> data = generateRandomFloats(size, -10.0f, 10.0f);
|
||||
if (size > 100) {
|
||||
data[10] = NAN;
|
||||
data[20] = INFINITY;
|
||||
data[30] = -INFINITY;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 性能测试工具
|
||||
// ============================================================================
|
||||
class PerformanceMeter {
|
||||
private:
|
||||
mcEvent_t start, stop;
|
||||
|
||||
public:
|
||||
PerformanceMeter() {
|
||||
MACA_CHECK(mcEventCreate(&start));
|
||||
MACA_CHECK(mcEventCreate(&stop));
|
||||
}
|
||||
|
||||
~PerformanceMeter() {
|
||||
mcEventDestroy(start);
|
||||
mcEventDestroy(stop);
|
||||
}
|
||||
|
||||
void startTiming() {
|
||||
MACA_CHECK(mcEventRecord(start));
|
||||
}
|
||||
|
||||
float stopTiming() {
|
||||
MACA_CHECK(mcEventRecord(stop));
|
||||
MACA_CHECK(mcEventSynchronize(stop));
|
||||
float milliseconds = 0;
|
||||
MACA_CHECK(mcEventElapsedTime(&milliseconds, start, stop));
|
||||
return milliseconds;
|
||||
}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 正确性验证工具
|
||||
// ============================================================================
|
||||
template<typename T>
|
||||
bool compareArrays(const std::vector<T>& a, const std::vector<T>& b, double tolerance = 1e-6) {
|
||||
if (a.size() != b.size()) return false;
|
||||
|
||||
for (size_t i = 0; i < a.size(); i++) {
|
||||
if constexpr (std::is_same_v<T, half>) {
|
||||
float fa = __half2float(a[i]);
|
||||
float fb = __half2float(b[i]);
|
||||
if (std::isnan(fa) && std::isnan(fb)) continue;
|
||||
if (std::isinf(fa) && std::isinf(fb) && (fa > 0) == (fb > 0)) continue;
|
||||
if (std::abs(fa - fb) > tolerance) return false;
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
if (std::isnan(a[i]) && std::isnan(b[i])) continue;
|
||||
if (std::isinf(a[i]) && std::isinf(b[i]) && (a[i] > 0) == (b[i] > 0)) continue;
|
||||
if (std::abs(a[i] - b[i]) > tolerance) return false;
|
||||
} else {
|
||||
if (a[i] != b[i]) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// CPU参考实现 - 稳定排序
|
||||
template<typename KeyType, typename ValueType>
|
||||
void cpuSortPair(std::vector<KeyType>& keys, std::vector<ValueType>& values, bool descending) {
|
||||
std::vector<std::pair<KeyType, ValueType>> pairs;
|
||||
for (size_t i = 0; i < keys.size(); i++) {
|
||||
pairs.emplace_back(keys[i], values[i]);
|
||||
}
|
||||
|
||||
if (descending) {
|
||||
std::stable_sort(pairs.begin(), pairs.end(),
|
||||
[](const auto& a, const auto& b) { return a.first > b.first; });
|
||||
} else {
|
||||
std::stable_sort(pairs.begin(), pairs.end());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < pairs.size(); i++) {
|
||||
keys[i] = pairs[i].first;
|
||||
values[i] = pairs[i].second;
|
||||
}
|
||||
}
|
||||
|
||||
// CPU参考实现 - TopK
|
||||
template<typename KeyType, typename ValueType>
|
||||
void cpuTopkPair(const std::vector<KeyType>& keys_in, const std::vector<ValueType>& values_in,
|
||||
std::vector<KeyType>& keys_out, std::vector<ValueType>& values_out,
|
||||
int k, bool descending) {
|
||||
std::vector<std::pair<KeyType, ValueType>> pairs;
|
||||
for (size_t i = 0; i < keys_in.size(); i++) {
|
||||
pairs.emplace_back(keys_in[i], values_in[i]);
|
||||
}
|
||||
|
||||
if (descending) {
|
||||
std::stable_sort(pairs.begin(), pairs.end(),
|
||||
[](const auto& a, const auto& b) { return a.first > b.first; });
|
||||
} else {
|
||||
std::stable_sort(pairs.begin(), pairs.end());
|
||||
}
|
||||
|
||||
keys_out.resize(k);
|
||||
values_out.resize(k);
|
||||
for (int i = 0; i < k; i++) {
|
||||
keys_out[i] = pairs[i].first;
|
||||
values_out[i] = pairs[i].second;
|
||||
}
|
||||
}
|
||||
|
||||
// CPU参考实现 - ReduceSum (使用double精度)
|
||||
template<typename InputT>
|
||||
double cpuReduceSum(const std::vector<InputT>& data, double init_value) {
|
||||
double sum = init_value;
|
||||
for (const auto& val : data) {
|
||||
if constexpr (std::is_same_v<InputT, half>) {
|
||||
float f_val = __half2float(val);
|
||||
if (!std::isnan(f_val)) {
|
||||
sum += static_cast<double>(f_val);
|
||||
}
|
||||
} else {
|
||||
if (!std::isnan(val)) {
|
||||
sum += static_cast<double>(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
#pragma once
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
// ============================================================================
|
||||
// YAML性能报告生成器
|
||||
// ============================================================================
|
||||
|
||||
class YAMLPerformanceReporter {
|
||||
public:
|
||||
struct PerformanceData {
|
||||
std::string algorithm;
|
||||
std::string input_type;
|
||||
std::string output_type;
|
||||
std::string key_type;
|
||||
std::string value_type;
|
||||
std::vector<std::map<std::string, std::string>> metrics;
|
||||
};
|
||||
|
||||
// 创建性能数据条目
|
||||
static std::map<std::string, std::string> createEntry() {
|
||||
return std::map<std::string, std::string>();
|
||||
}
|
||||
|
||||
// 生成ReduceSum性能YAML
|
||||
static void generateReduceSumYAML(const std::vector<std::map<std::string, std::string>>& perf_data,
|
||||
const std::string& filename = "reduce_sum_performance.yaml") {
|
||||
std::ofstream yaml_file(filename);
|
||||
|
||||
// 写入头部信息
|
||||
writeHeader(yaml_file, "ReduceSum算法性能测试结果");
|
||||
|
||||
// 算法信息
|
||||
yaml_file << "algorithm: \"ReduceSum\"\n";
|
||||
yaml_file << "data_types:\n";
|
||||
yaml_file << " input: \"float\"\n";
|
||||
yaml_file << " output: \"float\"\n";
|
||||
|
||||
// 计算公式
|
||||
yaml_file << "formulas:\n";
|
||||
yaml_file << " throughput: \"elements / time(s) / 1e9 (G/s)\"\n";
|
||||
|
||||
// 性能数据
|
||||
yaml_file << "performance_data:\n";
|
||||
for (const auto& data : perf_data) {
|
||||
yaml_file << " - data_size: " << data.at("data_size") << "\n";
|
||||
yaml_file << " time_ms: " << formatFloat(data.at("time_ms")) << "\n";
|
||||
yaml_file << " throughput_gps: " << formatFloat(data.at("throughput_gps")) << "\n";
|
||||
yaml_file << " data_type: \"" << data.at("data_type") << "\"\n";
|
||||
}
|
||||
|
||||
yaml_file.close();
|
||||
}
|
||||
|
||||
// 生成SortPair性能YAML
|
||||
static void generateSortPairYAML(const std::vector<std::map<std::string, std::string>>& perf_data,
|
||||
const std::string& filename = "sort_pair_performance.yaml") {
|
||||
std::ofstream yaml_file(filename);
|
||||
|
||||
// 写入头部信息
|
||||
writeHeader(yaml_file, "SortPair算法性能测试结果");
|
||||
|
||||
// 算法信息
|
||||
yaml_file << "algorithm: \"SortPair\"\n";
|
||||
yaml_file << "data_types:\n";
|
||||
yaml_file << " key_type: \"float\"\n";
|
||||
yaml_file << " value_type: \"uint32_t\"\n";
|
||||
|
||||
// 计算公式
|
||||
yaml_file << "formulas:\n";
|
||||
yaml_file << " throughput: \"elements / time(s) / 1e9 (G/s)\"\n";
|
||||
|
||||
// 性能数据
|
||||
yaml_file << "performance_data:\n";
|
||||
for (const auto& data : perf_data) {
|
||||
yaml_file << " - data_size: " << data.at("data_size") << "\n";
|
||||
yaml_file << " ascending:\n";
|
||||
yaml_file << " time_ms: " << formatFloat(data.at("asc_time_ms")) << "\n";
|
||||
yaml_file << " throughput_gps: " << formatFloat(data.at("asc_throughput_gps")) << "\n";
|
||||
yaml_file << " descending:\n";
|
||||
yaml_file << " time_ms: " << formatFloat(data.at("desc_time_ms")) << "\n";
|
||||
yaml_file << " throughput_gps: " << formatFloat(data.at("desc_throughput_gps")) << "\n";
|
||||
yaml_file << " key_type: \"" << data.at("key_type") << "\"\n";
|
||||
yaml_file << " value_type: \"" << data.at("value_type") << "\"\n";
|
||||
}
|
||||
|
||||
yaml_file.close();
|
||||
}
|
||||
|
||||
// 生成TopkPair性能YAML
|
||||
static void generateTopkPairYAML(const std::vector<std::map<std::string, std::string>>& perf_data,
|
||||
const std::string& filename = "topk_pair_performance.yaml") {
|
||||
std::ofstream yaml_file(filename);
|
||||
|
||||
// 写入头部信息
|
||||
writeHeader(yaml_file, "TopkPair算法性能测试结果");
|
||||
|
||||
// 算法信息
|
||||
yaml_file << "algorithm: \"TopkPair\"\n";
|
||||
yaml_file << "data_types:\n";
|
||||
yaml_file << " key_type: \"float\"\n";
|
||||
yaml_file << " value_type: \"uint32_t\"\n";
|
||||
|
||||
// 计算公式
|
||||
yaml_file << "formulas:\n";
|
||||
yaml_file << " throughput: \"elements / time(s) / 1e9 (G/s)\"\n";
|
||||
|
||||
// 性能数据
|
||||
yaml_file << "performance_data:\n";
|
||||
for (const auto& data : perf_data) {
|
||||
yaml_file << " - data_size: " << data.at("data_size") << "\n";
|
||||
yaml_file << " k_value: " << data.at("k_value") << "\n";
|
||||
yaml_file << " ascending:\n";
|
||||
yaml_file << " time_ms: " << formatFloat(data.at("asc_time_ms")) << "\n";
|
||||
yaml_file << " throughput_gps: " << formatFloat(data.at("asc_throughput_gps")) << "\n";
|
||||
yaml_file << " descending:\n";
|
||||
yaml_file << " time_ms: " << formatFloat(data.at("desc_time_ms")) << "\n";
|
||||
yaml_file << " throughput_gps: " << formatFloat(data.at("desc_throughput_gps")) << "\n";
|
||||
yaml_file << " key_type: \"" << data.at("key_type") << "\"\n";
|
||||
yaml_file << " value_type: \"" << data.at("value_type") << "\"\n";
|
||||
}
|
||||
|
||||
yaml_file.close();
|
||||
}
|
||||
|
||||
private:
|
||||
// 写入YAML文件头部
|
||||
static void writeHeader(std::ofstream& file, const std::string& title) {
|
||||
file << "# " << title << "\n";
|
||||
file << "# 生成时间: ";
|
||||
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto time_t = std::chrono::system_clock::to_time_t(now);
|
||||
file << std::put_time(std::localtime(&time_t), "%Y-%m-%d %H:%M:%S");
|
||||
file << "\n\n";
|
||||
}
|
||||
|
||||
// 格式化浮点数
|
||||
static std::string formatFloat(const std::string& value) {
|
||||
try {
|
||||
double d = std::stod(value);
|
||||
std::ostringstream oss;
|
||||
oss << std::fixed << std::setprecision(6) << d;
|
||||
return oss.str();
|
||||
} catch (...) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
};
|
||||
Loading…
Reference in New Issue