diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..503f6e4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.DS_Store
+*.bak
+*.pyc
+*.o
+*/build/
+cp_template/*.yaml
\ No newline at end of file
diff --git a/README.md b/README.md
index 1c017fd..cc999fe 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,52 @@
 
 ---
 
-## 📥 如何参与提交？
+## 🚀 快速上手
+
+本竞赛旨在评估参赛者在GPU并行计算领域的算法优化能力。为了快速让参赛者进入比赛状态，可选择实现三个核心算法的高性能版本：
+- **ReduceSum**: 高精度归约求和
+- **SortPair**: 键值对稳定排序
+- **TopkPair**: 键值对TopK选择
+
+### 📥 
+
+### 编译和测试
+
+#### 1. 全量编译和运行
+```bash
+# 编译并运行所有算法测试（默认行为）
+./run.sh
+
+# 仅编译所有算法，不运行测试
+./run.sh --build-only
+
+# 编译并运行单个算法测试
+./run.sh --run_reduce   # ReduceSum算法
+./run.sh --run_sort     # SortPair算法
+./run.sh --run_topk     # TopkPair算法
+```
+
+#### 2. 单独编译和运行
+```bash
+# 编译并运行ReduceSum算法（默认行为）
+./run_reduce_sum.sh
+
+# 仅编译ReduceSum算法，不运行测试
+./run_reduce_sum.sh --build-only
+
+# 编译并运行SortPair正确性测试
+./run_sort_pair.sh --run correctness
+
+# 编译并运行TopkPair性能测试
+./run_topk_pair.sh --run performance
+```
+
+#### 3. 手动运行测试
+```bash
+./build/test_reducesum [correctness|performance|all]
+./build/test_sortpair [correctness|performance|all]
+./build/test_topkpair [correctness|performance|all]
+```
 
 ### ✅ 参赛要求：
 - 提交内容必须可以在沐曦自研 GPU **曦云 C500** 上运行。
@@ -72,7 +117,7 @@
 
 ## 🏅 排名规则
 
-- 比赛周期：2 个月  
+- 比赛周期：2 个月
 - 排名按累计得分排序，取前 12 名！
 
 若得分相同：
diff --git a/S1/ICTN0N/build/test_reducesum b/S1/ICTN0N/build/test_reducesum
new file mode 100755
index 0000000..7d95691
Binary files /dev/null and b/S1/ICTN0N/build/test_reducesum differ
diff --git a/S1/ICTN0N/build/test_sortpair b/S1/ICTN0N/build/test_sortpair
new file mode 100755
index 0000000..67cb03c
Binary files /dev/null and b/S1/ICTN0N/build/test_sortpair differ
diff --git a/S1/ICTN0N/build/test_topkpair b/S1/ICTN0N/build/test_topkpair
new file mode 100755
index 0000000..ee5a5d7
Binary files /dev/null and b/S1/ICTN0N/build/test_topkpair differ
diff --git a/S1/ICTN0N/reduce_sum_performance.yaml b/S1/ICTN0N/reduce_sum_performance.yaml
new file mode 100644
index 0000000..1e8822a
--- /dev/null
+++ b/S1/ICTN0N/reduce_sum_performance.yaml
@@ -0,0 +1,26 @@
+# ReduceSum算法性能测试结果
+# 生成时间: 2025-09-03 22:34:18
+
+algorithm: "ReduceSum"
+data_types:
+  input: "float"
+  output: "float"
+formulas:
+  throughput: "elements / time(s) / 1e9 (G/s)"
+performance_data:
+  - data_size: 1000000
+    time_ms: 0.048717
+    throughput_gps: 20.526799
+    data_type: "float"
+  - data_size: 134217728
+    time_ms: 0.402560
+    throughput_gps: 333.410496
+    data_type: "float"
+  - data_size: 536870912
+    time_ms: 1.346586
+    throughput_gps: 398.690510
+    data_type: "float"
+  - data_size: 1073741824
+    time_ms: 2.639513
+    throughput_gps: 406.795353
+    data_type: "float"
diff --git a/S1/ICTN0N/sort_pair_performance.yaml b/S1/ICTN0N/sort_pair_performance.yaml
new file mode 100644
index 0000000..9af8853
--- /dev/null
+++ b/S1/ICTN0N/sort_pair_performance.yaml
@@ -0,0 +1,46 @@
+# SortPair算法性能测试结果
+# 生成时间: 2025-09-03 22:37:18
+
+algorithm: "SortPair"
+data_types:
+  key_type: "float"
+  value_type: "uint32_t"
+formulas:
+  throughput: "elements / time(s) / 1e9 (G/s)"
+performance_data:
+  - data_size: 1000000
+    ascending:
+      time_ms: 0.351488
+      throughput_gps: 2.845047
+    descending:
+      time_ms: 0.343270
+      throughput_gps: 2.913155
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 134217728
+    ascending:
+      time_ms: 22.273815
+      throughput_gps: 6.025808
+    descending:
+      time_ms: 22.494003
+      throughput_gps: 5.966823
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 536870912
+    ascending:
+      time_ms: 88.856277
+      throughput_gps: 6.042014
+    descending:
+      time_ms: 89.913918
+      throughput_gps: 5.970943
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1073741824
+    ascending:
+      time_ms: 181.409576
+      throughput_gps: 5.918882
+    descending:
+      time_ms: 183.428955
+      throughput_gps: 5.853720
+    key_type: "float"
+    value_type: "uint32_t"
diff --git a/S1/ICTN0N/topk_pair_performance.yaml b/S1/ICTN0N/topk_pair_performance.yaml
new file mode 100644
index 0000000..f8dab18
--- /dev/null
+++ b/S1/ICTN0N/topk_pair_performance.yaml
@@ -0,0 +1,210 @@
+# TopkPair算法性能测试结果
+# 生成时间: 2025-09-03 22:40:54
+
+algorithm: "TopkPair"
+data_types:
+  key_type: "float"
+  value_type: "uint32_t"
+formulas:
+  throughput: "elements / time(s) / 1e9 (G/s)"
+performance_data:
+  - data_size: 1000000
+    k_value: 32
+    ascending:
+      time_ms: 0.402509
+      throughput_gps: 2.484418
+    descending:
+      time_ms: 0.416307
+      throughput_gps: 2.402072
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1000000
+    k_value: 50
+    ascending:
+      time_ms: 0.404787
+      throughput_gps: 2.470434
+    descending:
+      time_ms: 0.414669
+      throughput_gps: 2.411563
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1000000
+    k_value: 100
+    ascending:
+      time_ms: 0.398336
+      throughput_gps: 2.510443
+    descending:
+      time_ms: 0.408320
+      throughput_gps: 2.449060
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1000000
+    k_value: 256
+    ascending:
+      time_ms: 0.410752
+      throughput_gps: 2.434559
+    descending:
+      time_ms: 0.403379
+      throughput_gps: 2.479057
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1000000
+    k_value: 1024
+    ascending:
+      time_ms: 0.391091
+      throughput_gps: 2.556949
+    descending:
+      time_ms: 0.391142
+      throughput_gps: 2.556613
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 134217728
+    k_value: 32
+    ascending:
+      time_ms: 22.394062
+      throughput_gps: 5.993452
+    descending:
+      time_ms: 22.263729
+      throughput_gps: 6.028538
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 134217728
+    k_value: 50
+    ascending:
+      time_ms: 22.379187
+      throughput_gps: 5.997435
+    descending:
+      time_ms: 22.228352
+      throughput_gps: 6.038132
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 134217728
+    k_value: 100
+    ascending:
+      time_ms: 22.436581
+      throughput_gps: 5.982094
+    descending:
+      time_ms: 22.229326
+      throughput_gps: 6.037868
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 134217728
+    k_value: 256
+    ascending:
+      time_ms: 22.463232
+      throughput_gps: 5.974996
+    descending:
+      time_ms: 22.319946
+      throughput_gps: 6.013354
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 134217728
+    k_value: 1024
+    ascending:
+      time_ms: 22.468454
+      throughput_gps: 5.973608
+    descending:
+      time_ms: 22.335976
+      throughput_gps: 6.009038
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 536870912
+    k_value: 32
+    ascending:
+      time_ms: 89.437294
+      throughput_gps: 6.002763
+    descending:
+      time_ms: 88.605972
+      throughput_gps: 6.059083
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 536870912
+    k_value: 50
+    ascending:
+      time_ms: 89.460587
+      throughput_gps: 6.001200
+    descending:
+      time_ms: 88.546509
+      throughput_gps: 6.063152
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 536870912
+    k_value: 100
+    ascending:
+      time_ms: 89.203011
+      throughput_gps: 6.018529
+    descending:
+      time_ms: 88.809097
+      throughput_gps: 6.045224
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 536870912
+    k_value: 256
+    ascending:
+      time_ms: 89.500465
+      throughput_gps: 5.998526
+    descending:
+      time_ms: 88.743912
+      throughput_gps: 6.049665
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 536870912
+    k_value: 1024
+    ascending:
+      time_ms: 89.405357
+      throughput_gps: 6.004908
+    descending:
+      time_ms: 88.446083
+      throughput_gps: 6.070036
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1073741824
+    k_value: 32
+    ascending:
+      time_ms: 182.233307
+      throughput_gps: 5.892127
+    descending:
+      time_ms: 181.076950
+      throughput_gps: 5.929754
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1073741824
+    k_value: 50
+    ascending:
+      time_ms: 182.273239
+      throughput_gps: 5.890836
+    descending:
+      time_ms: 180.944550
+      throughput_gps: 5.934093
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1073741824
+    k_value: 100
+    ascending:
+      time_ms: 182.374191
+      throughput_gps: 5.887576
+    descending:
+      time_ms: 181.277100
+      throughput_gps: 5.923207
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1073741824
+    k_value: 256
+    ascending:
+      time_ms: 182.349457
+      throughput_gps: 5.888374
+    descending:
+      time_ms: 181.248199
+      throughput_gps: 5.924152
+    key_type: "float"
+    value_type: "uint32_t"
+  - data_size: 1073741824
+    k_value: 1024
+    ascending:
+      time_ms: 182.378326
+      throughput_gps: 5.887442
+    descending:
+      time_ms: 181.025803
+      throughput_gps: 5.931430
+    key_type: "float"
+    value_type: "uint32_t"
diff --git a/cp_guide.md b/cp_run_guide.md
similarity index 51%
rename from cp_guide.md
rename to cp_run_guide.md
index 3385ae0..b7c4ca2 100644
--- a/cp_guide.md
+++ b/cp_run_guide.md
@@ -1,59 +1,12 @@
 # GPU 高性能并行计算算法优化竞赛
 
-## 🎯 竞赛概述
-
-本竞赛旨在评估参赛者在GPU并行计算领域的算法优化能力。参赛者可选择实现三个核心算法的高性能版本：
-- **ReduceSum**: 高精度归约求和
-- **SortPair**: 键值对稳定排序
-- **TopkPair**: 键值对TopK选择
-
-## 🚀 快速开始
-
-### 编译和测试
-
-#### 1. 全量编译和运行
-```bash
-# 编译并运行所有算法测试（默认行为）
-./build_and_run.sh
-
-# 仅编译所有算法，不运行测试
-./build_and_run.sh --build-only
-
-# 编译并运行单个算法测试
-./build_and_run.sh --run_reduce   # ReduceSum算法
-./build_and_run.sh --run_sort     # SortPair算法
-./build_and_run.sh --run_topk     # TopkPair算法
-```
-
-#### 2. 单独编译和运行
-```bash
-# 编译并运行ReduceSum算法（默认行为）
-./build_and_run_reduce_sum.sh
-
-# 仅编译ReduceSum算法，不运行测试
-./build_and_run_reduce_sum.sh --build-only
-
-# 编译并运行SortPair正确性测试
-./build_and_run_sort_pair.sh --run correctness
-
-# 编译并运行TopkPair性能测试
-./build_and_run_topk_pair.sh --run performance
-```
-
-#### 3. 手动运行测试
-```bash
-./build/test_reducesum [correctness|performance|all]
-./build/test_sortpair [correctness|performance|all]
-./build/test_topkpair [correctness|performance|all]
-```
-
 ## 📝 参赛指南
 
 ### 实现位置
 参赛者需要在以下文件中替换Thrust实现：
-- `src/reduce_sum_algorithm.maca` - 替换Thrust归约求和
-- `src/sort_pair_algorithm.maca` - 替换Thrust稳定排序
-- `src/topk_pair_algorithm.maca` - 替换Thrust TopK选择
+- `reduce_sum_algorithm.maca` - 替换Thrust归约求和
+- `sort_pair_algorithm.maca` - 替换Thrust稳定排序
+- `topk_pair_algorithm.maca` - 替换Thrust TopK选择
 
 ### 算法要求
 见competition_parallel_algorithms.md
@@ -92,25 +45,21 @@
 - 各数据规模的详细性能数据
 - 升序/降序分别统计（适用时）
 
-## 📁 项目结构
+## 📁 提交内容结构
 
 ```
-├── build_and_run.sh                # 统一编译和运行脚本（默认编译+运行所有算法）
-├── build_common.sh                  # 公共编译配置和函数
-├── build_and_run_reduce_sum.sh     # ReduceSum独立编译和运行脚本
-├── build_and_run_sort_pair.sh      # SortPair独立编译和运行脚本
-├── build_and_run_topk_pair.sh      # TopkPair独立编译和运行脚本
+├── run.sh                # 统一编译和运行脚本（默认编译+运行所有算法）
 ├── competition_parallel_algorithms.md  # 详细题目说明
-├── src/                            # 算法实现和工具文件
-│   ├── reduce_sum_algorithm.maca     # 1. ReduceSum测试程序
-│   ├── sort_pair_algorithm.maca      # 2. SortPair测试程序
-│   ├── topk_pair_algorithm.maca      # 3. TopkPair测试程序
+│── reduce_sum_algorithm.maca     # 1. ReduceSum测试程序
+│── sort_pair_algorithm.maca      # 2. SortPair测试程序
+│── topk_pair_algorithm.maca      # 3. TopkPair测试程序
+├── utils/                        # 工具文件
 │   ├── test_utils.h               # 测试工具和CPU参考实现
 │   ├── yaml_reporter.h            # YAML性能报告生成器
 │   └── performance_utils.h        # 性能测试工具
-├── final_results/reduce_sum_results.yaml  #ReduceSum性能数据
-├── final_results/sort_pair_results.yaml   #替换Thrust稳定排序
-└── final_results/topk_pair_results.yaml   #TopkPair性能数据         
+├── reduce_sum_results.yaml        #ReduceSum性能数据
+├── sort_pair_results.yaml         #替换Thrust稳定排序
+└── topk_pair_results.yaml         #TopkPair性能数据         
 ```
 
 ## 🔧 开发工具
@@ -134,7 +83,7 @@ mxcc -O3 -std=c++17 --extended-lambda -Isrc
 |--------|--------|------|
 | `COMPILER` | `mxcc` | CUDA编译器路径 |
 | `COMPILER_FLAGS` | `-O3 -std=c++17 --extended-lambda` | 编译标志 |
-| `INCLUDE_DIR` | `src` | 头文件目录 |
+| `HEADER_DIR` | `utils` | 头文件目录 |
 | `BUILD_DIR` | `build` | 构建输出目录 |
 
 ### 调试模式
diff --git a/competition_parallel_algorithms.md b/cp_template/competition_parallel_algorithms.md
similarity index 99%
rename from competition_parallel_algorithms.md
rename to cp_template/competition_parallel_algorithms.md
index 6cf1efd..70bf630 100644
--- a/competition_parallel_algorithms.md
+++ b/cp_template/competition_parallel_algorithms.md
@@ -1,11 +1,11 @@
-# 题目：
+# 样例赛题说明
+
 ## GPU高性能并行计算算法优化
 
 要求参赛者通过一个或多个global kernel 函数（允许配套 device 辅助函数），实现高性能算法。
 
 在正确性、稳定性前提下，比拼算法性能。
 
-
 # 1. ReduceSum算法优化
 ```cpp
 template <typename InputT = float, typename OutputT = float>
@@ -23,14 +23,12 @@ public:
 * 系统将测试评估1M, 128M, 512M, 1G element number下的算法性能
 * 假定输入d\_in数据量为num\_items
 
-
 注意事项
 
 * 累计误差不大于cpu double golden基准的0.5%
 * 注意针对NAN和INF等异常值的处理
 
 
-
 加分项
 
 * 使用tensor core计算reduce
@@ -62,14 +60,11 @@ public:
 * 需要校验结果正确性
 * 结果必须稳定排序
 
-
 加分项
 
 * 支持其他不同数据类型的排序，如half、double、int32_t等
 * 覆盖更全面的数据范围，提供良好稳定的性能表现
 
-
-
 # 3. Topk Pair算法优化
 ```cpp
 template <typename KeyType, typename ValueType>
@@ -95,7 +90,6 @@ public:
 
 * 结果必须稳定排序
 
-
 加分项
 
 * 支持其他不同数据类型的键值对，实现类型通用算法
diff --git a/run.sh b/cp_template/run.sh
similarity index 99%
rename from run.sh
rename to cp_template/run.sh
index d6b612a..a437ff8 100644
--- a/run.sh
+++ b/cp_template/run.sh
@@ -36,11 +36,11 @@ COMPILER=${COMPILER:-mxcc}
 COMPILER_FLAGS=${COMPILER_FLAGS:-"-O3 -std=c++17 --extended-lambda -DRUN_FULL_TEST"} 
 
 # ***** 这里是关键修改点1：头文件目录 *****
-# 现在头文件在 includes/ 目录下
+# 现在头文件在 utils/ 目录下
 HEADER_DIR=${HEADER_DIR:-utils} 
 
 # ***** 这里是关键修改点2：源文件目录 *****
-# 现在源文件在 algorithms/ 目录下
+# 现在源文件在 ./ 目录下
 SOURCE_CODE_DIR=${SOURCE_CODE_DIR:-} 
 
 BUILD_DIR=${BUILD_DIR:-build}
diff --git a/utils/performance_utils.h b/cp_template/utils/performance_utils.h
similarity index 100%
rename from utils/performance_utils.h
rename to cp_template/utils/performance_utils.h
diff --git a/utils/test_utils.h b/cp_template/utils/test_utils.h
similarity index 100%
rename from utils/test_utils.h
rename to cp_template/utils/test_utils.h
diff --git a/utils/yaml_reporter.h b/cp_template/utils/yaml_reporter.h
similarity index 100%
rename from utils/yaml_reporter.h
rename to cp_template/utils/yaml_reporter.h