slurm init

This commit is contained in:
zhouqunjie 2022-10-14 08:14:23 +08:00
parent 1b0c2119cb
commit 8ae093d187
30 changed files with 3590 additions and 0 deletions

View File

@ -0,0 +1,121 @@
/*These are some extra functions to work with slurm in go
** They are seperated, since they don't use the slurm-API
** but wrap arround the SLURM comand line tools */
package extra
import (
"fmt"
"os"
"os/exec"
"strings"
"errors"
"path/filepath"
"slurm/jobinfo"
"slurm"
"strconv"
)
var slurm_path string
func find_slurm_path () {
var err error
var path string
path=os.Getenv("SLURM_PATH")
if path == " "{
path, err = exec.LookPath("sinfo")
if err != nil {
fmt.Printf("could not find slurm executables\n Either add slum-bins to your PATH or define SLURM_PATH\n")
} else {
slurm_path=strings.TrimSuffix(path, "bin/sinfo")
}
} else {
test_path := filepath.Join(path, "bin/sinfo")
_, err := os.Stat(test_path)
if os.IsNotExist(err) {
fmt.Printf("Slurm executable sinfo does no exist at %s\n", test_path)
} else {
slurm_path = path
}
}
}
func Cancel_job( JobId uint32) error{
find_slurm_path()
if slurm_path == "" {
return errors.New("Cannot find slurm executable")
}
job_list := job_info.Get_job(JobId)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf(msg)
return errors.New(msg)
}
path := filepath.Join(slurm_path,"bin","scancel")
cmd := exec.Command(path, strconv.FormatInt(int64(JobId), 10))
fmt.Print(cmd.String())
out, err := cmd.CombinedOutput()
if err!= nil {
msg := string(out) + err.Error()
return errors.New(msg)
}
return nil
}
type Acc_Job_info struct {
JobId uint32;
User string;
Account string;
State string;
JobName string;
}
var sacct_format_string string
func parse_sacct_output(input string) []Acc_Job_info {
var job_list []Acc_Job_info
lines := strings.Split(string(input), "\n")
fmt.Printf("len %d\n",len(lines)-1)
for l := range lines {
var job_info Acc_Job_info
elements := strings.Split(lines[l], "|")
if len(elements) < 5 {
break //Well, this is not clean, but keep it like this for Now
}
id, ierr := strconv.Atoi(elements[0])
if ierr != nil {
break //we have no useable entry here but something like 323.batch . Ignore these for now
}
job_info.JobId =uint32(id)
job_info.User = elements[1]
job_info.Account = elements[2]
job_info.State = elements[3]
job_info.JobName =elements[4]
job_list = append(job_list, job_info)
}
return job_list
}
func Get_job_info_accounting(JobId uint32 ) ([]Acc_Job_info, error) {
sacct_format_string = "JobId,user,account,state,JobName"
find_slurm_path()
if slurm_path == "" {
return nil, errors.New("Cannot find slurm executable")
}
path := filepath.Join(slurm_path,"bin","sacct")
cmd:= exec.Command(path, "-j", strconv.FormatInt(int64(JobId), 10),"--format", sacct_format_string,"-p","-n")
//fmt.Printf(cmd.String())
out, err := cmd.CombinedOutput()
if err!= nil {
msg := string(out) + err.Error()
return nil, errors.New(msg)
}
list := parse_sacct_output(string(out))
return list, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,278 @@
package node_info
/*
#cgo LDFLAGS: -lslurm
#include<stdlib.h>
#include<slurm/slurm.h>
#include<slurm/slurm_errno.h>
inline uint8_t uint8_ptr(uint8_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
inline int8_t int8_ptr(int8_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
uint16_t uint16_ptr(uint16_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
inline int16_t int16_ptr(int16_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
inline uint32_t uint32_ptr(uint32_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
inline int32_t int32_ptr(int32_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
inline uint64_t uint64_ptr(uint64_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
inline int64_t int64_ptr(int16_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
struct node_info_msg *get_node_info(){
struct node_info_msg* node_buffer;
if(slurm_load_node ((time_t) NULL,
&node_buffer, SHOW_ALL))
return NULL;
return node_buffer;
}
struct node_info_msg *get_single_node_info(char* name){
struct node_info_msg* node_buffer;
if( slurm_load_node_single (&node_buffer, name, SHOW_DETAIL))
return NULL;
return node_buffer;
}
struct node_info* node_from_list(struct node_info_msg *list, int i){
return &list->node_array[i];
}
void free_node_buffer(void* buffer){
slurm_free_node_info_msg ((struct node_info_msg*)buffer);
}
*/
import "C"
import "fmt"
import "unsafe"
type Node_info struct {
Arch string;
Boards uint16;
Boot_time int64;
Cluster_name string;
Cores uint16;
Core_spec_cnt uint16;
Cpu_bind uint32;
Cpu_load uint32;
Free_mem uint64;
Cpus uint16;
Cpu_spec_list string;
Features string;
Features_act string;
Gres string;
Gres_drain string;
Gres_used string;
Mcs_label string;
Mem_spec_limit uint64;
Name string;
Next_state uint32;
Node_addr string;
Node_hostname string;
Node_state uint32;
Os string;
Owner uint32;
Partitions string;
Port uint16;
Real_memory uint64;
Reason string;
Reason_time int64;
Reason_uid uint32;
Slurmd_start_time int64;
Sockets uint16;
Threads uint16;
Tmp_disk uint32;
Weight uint32;
Tres_fmt_str string;
Version string;
}
func Node_info_convert_c_to_go(c_struct *C.struct_node_info) Node_info{
var go_struct Node_info
go_struct.Arch = C.GoString(c_struct.arch)
go_struct.Boards = uint16(c_struct.boards)
go_struct.Boot_time = int64(c_struct.boot_time)
go_struct.Cluster_name = C.GoString(c_struct.cluster_name)
go_struct.Cores = uint16(c_struct.cores)
go_struct.Core_spec_cnt = uint16(c_struct.core_spec_cnt)
go_struct.Cpu_bind = uint32(c_struct.cpu_bind)
go_struct.Cpu_load = uint32(c_struct.cpu_load)
go_struct.Free_mem = uint64(c_struct.free_mem)
go_struct.Cpus = uint16(c_struct.cpus)
go_struct.Cpu_spec_list = C.GoString(c_struct.cpu_spec_list)
go_struct.Features = C.GoString(c_struct.features)
go_struct.Features_act = C.GoString(c_struct.features_act)
go_struct.Gres = C.GoString(c_struct.gres)
go_struct.Gres_drain = C.GoString(c_struct.gres_drain)
go_struct.Gres_used = C.GoString(c_struct.gres_used)
go_struct.Mcs_label = C.GoString(c_struct.mcs_label)
go_struct.Mem_spec_limit = uint64(c_struct.mem_spec_limit)
go_struct.Name = C.GoString(c_struct.name)
go_struct.Next_state = uint32(c_struct.next_state)
go_struct.Node_addr = C.GoString(c_struct.node_addr)
go_struct.Node_hostname = C.GoString(c_struct.node_hostname)
go_struct.Node_state = uint32(c_struct.node_state)
go_struct.Os = C.GoString(c_struct.os)
go_struct.Owner = uint32(c_struct.owner)
go_struct.Partitions = C.GoString(c_struct.partitions)
go_struct.Port = uint16(c_struct.port)
go_struct.Real_memory = uint64(c_struct.real_memory)
go_struct.Reason = C.GoString(c_struct.reason)
go_struct.Reason_time = int64(c_struct.reason_time)
go_struct.Reason_uid = uint32(c_struct.reason_uid)
go_struct.Slurmd_start_time = int64(c_struct.slurmd_start_time)
go_struct.Sockets = uint16(c_struct.sockets)
go_struct.Threads = uint16(c_struct.threads)
go_struct.Tmp_disk = uint32(c_struct.tmp_disk)
go_struct.Weight = uint32(c_struct.weight)
go_struct.Tres_fmt_str = C.GoString(c_struct.tres_fmt_str)
go_struct.Version = C.GoString(c_struct.version)
return go_struct
}
func State_to_string(state uint32) string{
switch s := C.uint16_t(state); s {
case C.NODE_STATE_UNKNOWN:
return "node state unknown"
case C.NODE_STATE_DOWN:
return "node state down"
case C.NODE_STATE_IDLE:
return "node state idle"
case C.NODE_STATE_ALLOCATED:
return "node state allocated"
case C.NODE_STATE_ERROR:
return "node state error"
case C.NODE_STATE_MIXED:
return "node state mixed"
case C.NODE_STATE_FUTURE:
return "node state future"
case C.NODE_STATE_END:
return "node state end"
}
return "Unkown state"
}
func Print_node_info(go_struct Node_info){
fmt.Printf("%s:\t %s\n","arch", go_struct.Arch)
fmt.Printf("%s:\t %d\n","boards", go_struct.Boards)
fmt.Printf("%s:\t %d\n","boot time", go_struct.Boot_time)
fmt.Printf("%s:\t %s\n","cluster name", go_struct.Cluster_name)
fmt.Printf("%s:\t %d\n","cores", go_struct.Cores)
fmt.Printf("%s:\t %d\n","core spec cnt", go_struct.Core_spec_cnt)
fmt.Printf("%s:\t %d\n","cpu bind", go_struct.Cpu_bind)
fmt.Printf("%s:\t %d\n","cpu load", go_struct.Cpu_load)
fmt.Printf("%s:\t %d\n","free mem", go_struct.Free_mem)
fmt.Printf("%s:\t %d\n","cpus", go_struct.Cpus)
fmt.Printf("%s:\t %s\n","cpu spec list", go_struct.Cpu_spec_list)
fmt.Printf("%s:\t %s\n","features", go_struct.Features)
fmt.Printf("%s:\t %s\n","features act", go_struct.Features_act)
fmt.Printf("%s:\t %s\n","gres", go_struct.Gres)
fmt.Printf("%s:\t %s\n","gres drain", go_struct.Gres_drain)
fmt.Printf("%s:\t %s\n","gres used", go_struct.Gres_used)
fmt.Printf("%s:\t %s\n","mcs label", go_struct.Mcs_label)
fmt.Printf("%s:\t %d\n","mem spec limit", go_struct.Mem_spec_limit)
fmt.Printf("%s:\t %s\n","name", go_struct.Name)
fmt.Printf("%s:\t %d\n","next state", go_struct.Next_state)
fmt.Printf("%s:\t %s\n","node addr", go_struct.Node_addr)
fmt.Printf("%s:\t %s\n","node hostname", go_struct.Node_hostname)
fmt.Printf("%s:\t %d\n","node state", go_struct.Node_state)
fmt.Printf("%s:\t %s\n","os", go_struct.Os)
fmt.Printf("%s:\t %d\n","owner", go_struct.Owner)
fmt.Printf("%s:\t %s\n","partitions", go_struct.Partitions)
fmt.Printf("%s:\t %d\n","port", go_struct.Port)
fmt.Printf("%s:\t %d\n","real memory", go_struct.Real_memory)
fmt.Printf("%s:\t %s\n","reason", go_struct.Reason)
fmt.Printf("%s:\t %d\n","reason time", go_struct.Reason_time)
fmt.Printf("%s:\t %d\n","reason uid", go_struct.Reason_uid)
fmt.Printf("%s:\t %d\n","slurmd start time", go_struct.Slurmd_start_time)
fmt.Printf("%s:\t %d\n","sockets", go_struct.Sockets)
fmt.Printf("%s:\t %d\n","threads", go_struct.Threads)
fmt.Printf("%s:\t %d\n","tmp disk", go_struct.Tmp_disk)
fmt.Printf("%s:\t %d\n","weight", go_struct.Weight)
fmt.Printf("%s:\t %s\n","tres fmt str", go_struct.Tres_fmt_str)
fmt.Printf("%s:\t %s\n","version", go_struct.Version)
}
type Node_info_msg struct {
Last_update int64;
Record_count uint32;
Error_code uint32;
Node_list []Node_info;
}
func Get_all_nodes() Node_info_msg {
var go_node_buffer Node_info_msg
c_node_buffer := C.get_node_info()
if c_node_buffer == nil {
go_node_buffer.Last_update = int64(0)
go_node_buffer.Record_count = uint32(0)
go_node_buffer.Error_code = uint32(C.slurm_get_errno())
return go_node_buffer
}
go_node_buffer.Last_update = int64(c_node_buffer.last_update)
go_node_buffer.Record_count = uint32(c_node_buffer.record_count)
go_node_buffer.Node_list =make([]Node_info,c_node_buffer.record_count, c_node_buffer.record_count)
for i:=uint32(0); i<go_node_buffer.Record_count; i++ {
node := C.node_from_list(c_node_buffer, C.int(i))
go_node := Node_info_convert_c_to_go(node)
go_node_buffer.Node_list[i]=go_node
}
C.slurm_free_node_info_msg (c_node_buffer);
return go_node_buffer
}
func Get_node_info (name string) Node_info_msg {
var go_node_buffer Node_info_msg
c_name := C.CString(name)
defer C.free(unsafe.Pointer(c_name))
c_node_buffer := C.get_single_node_info(c_name)
if c_node_buffer == nil {
go_node_buffer.Last_update = int64(0)
go_node_buffer.Record_count = uint32(0)
go_node_buffer.Error_code = uint32(C.slurm_get_errno())
return go_node_buffer;
}
go_node_buffer.Last_update = int64(c_node_buffer.last_update)
go_node_buffer.Record_count = uint32(c_node_buffer.record_count)
go_node_buffer.Node_list =make([]Node_info,c_node_buffer.record_count, c_node_buffer.record_count)
for i:=uint32(0); i<go_node_buffer.Record_count; i++ {
node := C.node_from_list(c_node_buffer, C.int(i))
go_node := Node_info_convert_c_to_go(node)
go_node_buffer.Node_list[i]=go_node
}
C.slurm_free_node_info_msg (c_node_buffer);
return go_node_buffer
}

View File

@ -0,0 +1,219 @@
package partition_info
/*
#cgo LDFLAGS: -lslurm
#include<stdlib.h>
#include<slurm/slurm.h>
uint8_t uint8_ptr(uint8_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
int8_t int8_ptr(int8_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
uint16_t uint16_ptr(uint16_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
int16_t int16_ptr(int16_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
uint32_t uint32_ptr(uint32_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
int32_t int32_ptr(int32_t* pointer, int off) {
if (NULL == pointer) {
return -1;}
pointer+=off;
return *pointer;
}
uint64_t uint64_ptr(uint64_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
int64_t int64_ptr(int16_t* pointer) {
if (NULL == pointer) {
return -1;}
return *pointer;
}
struct partition_info_msg *get_partition_info(){
struct partition_info_msg* partition_buffer;
if( slurm_load_partitions ((time_t) NULL,
&partition_buffer, SHOW_ALL))
return NULL;
return partition_buffer;
}
struct partition_info* partition_from_list(struct partition_info_msg *list, int i){
return &list->partition_array[i];
}
void free_partition_buffer(void* buffer){
slurm_free_partition_info_msg ((struct partition_info_msg*)buffer);
}
int find_node_inx(int32_t* node){
int ret = 0;
while(*node != -1) { node++; ret++;};
return ret;
}
*/
import "C"
import "fmt"
type Partition_info struct {
Allow_alloc_nodes string;
Allow_accounts string;
Allow_groups string;
Allow_qos string;
Alternate string;
Billing_weights_str string;
Cluster_name string;
Cr_type uint16;
Cpu_bind uint32;
Def_mem_per_cpu uint64;
Default_time uint32;
Deny_accounts string;
Deny_qos string;
Flags uint16;
Grace_time uint32;
Job_defaults_str string;
Max_cpus_per_node uint32;
Max_mem_per_cpu uint64;
Max_nodes uint32;
Max_share uint16;
Max_time uint32;
Min_nodes uint32;
Name string;
Node_inx[] int32;
Nodes string;
Over_time_limit uint16;
Preempt_mode uint16;
Priority_job_factor uint16;
Priority_tier uint16;
Qos_char string;
State_up uint16;
Total_cpus uint32;
Total_nodes uint32;
Tres_fmt_str string;
}
func Partition_info_convert_c_to_go(c_struct *C.struct_partition_info) Partition_info{
var go_struct Partition_info
go_struct.Allow_alloc_nodes = C.GoString(c_struct.allow_alloc_nodes)
go_struct.Allow_accounts = C.GoString(c_struct.allow_accounts)
go_struct.Allow_groups = C.GoString(c_struct.allow_groups)
go_struct.Allow_qos = C.GoString(c_struct.allow_qos)
go_struct.Alternate = C.GoString(c_struct.alternate)
go_struct.Billing_weights_str = C.GoString(c_struct.billing_weights_str)
go_struct.Cluster_name = C.GoString(c_struct.cluster_name)
go_struct.Cr_type = uint16(c_struct.cr_type)
go_struct.Cpu_bind = uint32(c_struct.cpu_bind)
go_struct.Def_mem_per_cpu = uint64(c_struct.def_mem_per_cpu)
go_struct.Default_time = uint32(c_struct.default_time)
go_struct.Deny_accounts = C.GoString(c_struct.deny_accounts)
go_struct.Deny_qos = C.GoString(c_struct.deny_qos)
go_struct.Flags = uint16(c_struct.flags)
go_struct.Grace_time = uint32(c_struct.grace_time)
go_struct.Job_defaults_str = C.GoString(c_struct.job_defaults_str)
go_struct.Max_cpus_per_node = uint32(c_struct.max_cpus_per_node)
go_struct.Max_mem_per_cpu = uint64(c_struct.max_mem_per_cpu)
go_struct.Max_nodes = uint32(c_struct.max_nodes)
go_struct.Max_share = uint16(c_struct.max_share)
go_struct.Max_time = uint32(c_struct.max_time)
go_struct.Min_nodes = uint32(c_struct.min_nodes)
go_struct.Name = C.GoString(c_struct.name)
t := C.find_node_inx(c_struct.node_inx)
fmt.Printf("%d", t)
go_struct.Node_inx = make([]int32, t,t)
for i:=int32(0); i<int32(t) ; i++{
go_struct.Node_inx[i] = int32(C.int32_ptr(c_struct.node_inx,C.int(i)))
}
go_struct.Nodes = C.GoString(c_struct.nodes)
go_struct.Over_time_limit = uint16(c_struct.over_time_limit)
go_struct.Preempt_mode = uint16(c_struct.preempt_mode)
go_struct.Priority_job_factor = uint16(c_struct.priority_job_factor)
go_struct.Priority_tier = uint16(c_struct.priority_tier)
go_struct.Qos_char = C.GoString(c_struct.qos_char)
go_struct.State_up = uint16(c_struct.state_up)
go_struct.Total_cpus = uint32(c_struct.total_cpus)
go_struct.Total_nodes = uint32(c_struct.total_nodes)
go_struct.Tres_fmt_str = C.GoString(c_struct.tres_fmt_str)
return go_struct
}
func Print_Partition_info(go_struct Partition_info){
fmt.Printf("%s:\t %s\n","allow alloc nodes", go_struct.Allow_alloc_nodes)
fmt.Printf("%s:\t %s\n","allow accounts", go_struct.Allow_accounts)
fmt.Printf("%s:\t %s\n","allow groups", go_struct.Allow_groups)
fmt.Printf("%s:\t %s\n","allow qos", go_struct.Allow_qos)
fmt.Printf("%s:\t %s\n","alternate", go_struct.Alternate)
fmt.Printf("%s:\t %s\n","billing weights str", go_struct.Billing_weights_str)
fmt.Printf("%s:\t %s\n","cluster name", go_struct.Cluster_name)
fmt.Printf("%s:\t %d\n","cr type", go_struct.Cr_type)
fmt.Printf("%s:\t %d\n","cpu bind", go_struct.Cpu_bind)
fmt.Printf("%s:\t %d\n","def mem per cpu", go_struct.Def_mem_per_cpu)
fmt.Printf("%s:\t %d\n","default time", go_struct.Default_time)
fmt.Printf("%s:\t %s\n","deny accounts", go_struct.Deny_accounts)
fmt.Printf("%s:\t %s\n","deny qos", go_struct.Deny_qos)
fmt.Printf("%s:\t %d\n","flags", go_struct.Flags)
fmt.Printf("%s:\t %d\n","grace time", go_struct.Grace_time)
fmt.Printf("%s:\t %s\n","job defaults str", go_struct.Job_defaults_str)
fmt.Printf("%s:\t %d\n","max cpus per node", go_struct.Max_cpus_per_node)
fmt.Printf("%s:\t %d\n","max mem per cpu", go_struct.Max_mem_per_cpu)
fmt.Printf("%s:\t %d\n","max nodes", go_struct.Max_nodes)
fmt.Printf("%s:\t %d\n","max share", go_struct.Max_share)
fmt.Printf("%s:\t %d\n","max time", go_struct.Max_time)
fmt.Printf("%s:\t %d\n","min nodes", go_struct.Min_nodes)
fmt.Printf("%s:\t %s\n","name", go_struct.Name)
fmt.Printf("%s:\t %d\n","node inx", go_struct.Node_inx)
fmt.Printf("%s:\t %s\n","nodes", go_struct.Nodes)
fmt.Printf("%s:\t %d\n","over time limit", go_struct.Over_time_limit)
fmt.Printf("%s:\t %d\n","preempt mode", go_struct.Preempt_mode)
fmt.Printf("%s:\t %d\n","priority job factor", go_struct.Priority_job_factor)
fmt.Printf("%s:\t %d\n","priority tier", go_struct.Priority_tier)
fmt.Printf("%s:\t %s\n","qos char", go_struct.Qos_char)
fmt.Printf("%s:\t %d\n","state up", go_struct.State_up)
fmt.Printf("%s:\t %d\n","total cpus", go_struct.Total_cpus)
fmt.Printf("%s:\t %d\n","total nodes", go_struct.Total_nodes)
fmt.Printf("%s:\t %s\n","tres fmt str", go_struct.Tres_fmt_str)
}
type Partition_info_msg struct {
Last_update int64;
Record_count uint32;
Partition_list []Partition_info;
}
func Get_partitions() Partition_info_msg {
var go_partition_buffer Partition_info_msg
c_partition_buffer := C.get_partition_info()
if c_partition_buffer == nil{
go_partition_buffer.Last_update = int64(0)
go_partition_buffer.Record_count = uint32(0)
return go_partition_buffer;
}
go_partition_buffer.Last_update = int64(c_partition_buffer.last_update)
go_partition_buffer.Record_count = uint32(c_partition_buffer.record_count)
go_partition_buffer.Partition_list =make([]Partition_info,c_partition_buffer.record_count, c_partition_buffer.record_count)
for i:=uint32(0); i<go_partition_buffer.Record_count; i++ {
partition := C.partition_from_list(c_partition_buffer, C.int(i))
go_partition := Partition_info_convert_c_to_go(partition)
go_partition_buffer.Partition_list[i]=go_partition
}
C.slurm_free_partition_info_msg (c_partition_buffer);
return go_partition_buffer
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
package main
import "slurm/extra"
import "fmt"
import "os"
import "strconv"
func main(){
if len(os.Args)<2 {
fmt.Printf("Please specify Job ID\n")
return
}
id,_ := strconv.Atoi(os.Args[1])
fmt.Printf("try to cancel %d\n", id)
err:= extra.Cancel_job(uint32( id))
if(err!= nil){
fmt.Printf(err.Error())
}
}

View File

@ -0,0 +1,25 @@
package main
import "slurm/extra"
import "fmt"
import "os"
import "strconv"
func main(){
if len(os.Args)<2 {
fmt.Printf("Please specify Job ID\n")
return
}
id,_ := strconv.Atoi(os.Args[1])
jobs, err := extra. Get_job_info_accounting(uint32( id))
if err!= nil {
fmt.Printf(err.Error())
return
}
fmt.Printf("JobId\tuser\taccount\tstate\t\tJobName\n")
for i := range(jobs) {
fmt.Printf("%d\t%s\t%s\t%s\t%s\n", jobs[i].JobId, jobs[i].User, jobs[i].Account, jobs[i].State, jobs[i].JobName)
}
}

View File

@ -0,0 +1,19 @@
package main
import "slurm/jobinfo"
import "fmt"
func main(){
job_list := job_info.Get_all_jobs()
fmt.Printf("Found %d jobs \n", job_list.Record_count)
/* a little bit nicer */
fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n")
fmt.Printf("________________________________________________________________________________________________\n")
for i := range job_list.Job_list {
job := job_list.Job_list[i]
fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" ,
job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS ,
job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority)
}
}

View File

@ -0,0 +1,20 @@
package main
import "slurm/nodeinfo"
import "fmt"
func main(){
node_list := node_info.Get_all_nodes()
fmt.Printf("Found %d nodes \n", node_list.Record_count)
/* a little bit nicer*/
fmt.Printf("name\t State\t\t\t Reason\t\t Tres\n")
fmt.Printf("________________________________________\n")
for i := range node_list.Node_list {
node := node_list.Node_list[i]
fmt.Printf("%s\t %s\t %s\t %s\n", node.Node_hostname, node_info.State_to_string(node.Node_state), node.Reason, node.Tres_fmt_str)
}
}

View File

@ -0,0 +1,15 @@
package main
import "slurm"
import "fmt"
func main(){
version := int(0)
var config slurm.Ctl_conf
version = slurm.Version()
fmt.Printf("Version is %s\n", slurm.VersionString(version));
config = slurm.GetConfig()
slurm.Print_Ctl_conf(config)
}

View File

@ -0,0 +1,41 @@
package main
import "slurm/jobinfo"
import "slurm"
import "fmt"
import "os"
import "strconv"
func main(){
if len(os.Args)<2 {
fmt.Printf("Please specify Job ID\n")
return
}
id,_ := strconv.Atoi(os.Args[1])
job_list := job_info.Get_job(uint32(id))
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
for i := range job_list.Job_list {
job_info.Print_Job_info(job_list.Job_list[i])
}
fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n")
fmt.Printf("________________________________________________________________________________________________\n")
for i := range job_list.Job_list {
job := job_list.Job_list[i]
fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" ,
job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS,
job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority)
}
end_time :=job_info.Get_job_endtime(uint32(id))
fmt.Printf("End-Time: %s\n", end_time)
}

View File

@ -0,0 +1,32 @@
package main
import "slurm/nodeinfo"
import "slurm"
import "fmt"
import "os"
func main(){
if len(os.Args)<2 {
fmt.Printf("Please specify node name\n")
return
}
name:= os.Args[1]
node_list := node_info.Get_node_info(name)
if(node_list.Error_code !=0) {
msg := slurm.GetErrorString(node_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Found %d nodes \n", node_list.Record_count)
/* a little bit nicer*/
fmt.Printf("name\t State\t\t\t Reason\t\t Tres\n")
fmt.Printf("________________________________________\n")
for i := range node_list.Node_list {
node := node_list.Node_list[i]
fmt.Printf("%s\t %s\t %s\t %s\n", node.Node_hostname, node_info.State_to_string(node.Node_state), node.Reason, node.Tres_fmt_str)
}
}

View File

@ -0,0 +1,20 @@
package main
import "slurm/partitioninfo"
import "fmt"
func main(){
partition_list := partition_info.Get_partitions()
fmt.Printf("Found %d partions \n", partition_list.Record_count)
/* a little bit nicer */
fmt.Printf("Name\t Nodes\t\t\t Max_time(min)\t\t Tres\n")
fmt.Printf("________________________________________\n")
for i := range partition_list.Partition_list {
partition := partition_list.Partition_list[i]
fmt.Printf("%s\t %s\t %d\t %d\n", partition.Name, partition.Nodes, partition.Max_time, partition.Node_inx )
}
}

View File

@ -0,0 +1,35 @@
package main
import "slurm/jobinfo"
import "slurm"
import "fmt"
import "os"
func main(){
if len(os.Args)<2 {
fmt.Printf("Please specify username\n")
return
}
name := os.Args[1]
job_list := job_info.Get_user_jobs(name)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n")
fmt.Printf("________________________________________________________________________________________________\n")
for i := range job_list.Job_list {
job := job_list.Job_list[i]
fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" ,
job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS ,
job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority)
}
}

View File

@ -0,0 +1,109 @@
# Submission of jobs
This folder shows in a few more examples of how jobs can be submitted in Slurm. Some examples use containers.
Attention: The parameters for job names and partitions probably have to be adjusted!
# Simple Jobs
## submit_job.go
In this example, a simple bash-Jobs is submitted. The used partition is *long* (adapt probably).
```
job_desc.Partition="long"
```
The job sets two environment variables and executes a
```
hostname
env | grep SLUM
```
On a single node of the cluster (single task job).
The application does not wait until the hob is completed, but dirctly returns.
The (std) output is written to
out-jobid.txt, the std- error to err-jobid.txt
```
job_desc.Std_out = ("./out-%j.txt")
job_desc.Std_err = ("./err-%j.txt")
````
## update_job.go
This example allows to update the qos and the partition a job is running on. This can help to move the job to another queue with another partition.
Note to users: In theory, the API allows the update of the number of nodes and the tasks per node. However, since this is only allowed by root or a slurm admin, we do not include an example here.
Synthax
```
./update_job JobId qos partition
```
(Note: This requires that the Job with the Id JobID is already submitted and in a pending state)
# Container jobs
The following examples all submit a job that starts singulrity containers.
These containers, if they do not exist, are created. However, problems can arise if the user does not have sudo permissions..
## The containers
The first container is an MPI container. This is used by and `submit_mpi_containier.go` and `submit_mpi_and_update.go`. The definition is stored in `mpi_container.def`
It can also be created with the command
```
sudo singularity build mpi_container.img mpi_container.def
```
The program mpi_pingppong (source code enclosed: `mpi_pingpong.c` ) is built into the container. It performs a ping-pong test between two processes.
This container uses the hybrid model, which assumes that MPI is installed on the cluter (to start the job) and installs it in the container itself. Works with OpenMPI.
The second container is an openmp container, including a sample OpenMP programm openmp_example (source code: ` openmp_example.c`).
It can also be created with the command:
```
sudo singularity build openmp_container.img openmp_container.def
```
This container is used bei `submit_openmp_container.go`.
## submit_mpi_containier.go
Submits a mpi-container job to the cluster. It runs to Processes on two nodes
```
job_desc.Min_nodes =uint32(2)
job_desc.Num_tasks = uint32(2)
```
The application blocks, until the job is completed. The (std) output is written to
jobid-out.txt, the std- error to jobId-err.txt
```
job_desc.Std_out = ("./%j-out.txt")
job_desc.Std_err = ("./%j-err.txt")
```
## submit_omp_container.go
Submits two openMP jobs to the cluster and wait, until they are completed.
Both jobs allocate *one process* for the job, but *two CPUs per task/process* (for multi-threading).
```
job_desc.Num_tasks = uint32(1)
job_desc.Cpus_per_task = uint16(2)
```
The first job reads the environment variable ` SLURM_JOB_CPUS_PER_NODE` and sets the number of openMP threads to exactly the number of cpus that are available per task/process.
```
job_desc.Script+= "export OMP_NUM_THREADS=$SLURM_JOB_CPUS_PER_NODE\n"
```
The second job sets the number of threads to 4 (which is oversuscribing because more threads are started than processes) and executes the same job.
```
job_desc.Script+= "export OMP_NUM_THREADS=4\n"
```
The program waits until both jobs are completed. The results are written to the two outputs files, similiar to `submit_mpi_container.go`
### submit_mpi_and_update.go
This application is dooing the same as `submit_mpi_container.go`
```
ops.Qos = "shortjobs"
ops.Partition = "short"
```
This situation, can, for example, be created my submitting longer, other jobs bevore in the background (depending on the partion size) and than start this application:
```
./submit_mpi_containier & ./submit_mpi_containier & ./submit_mpi_and_update
```

View File

@ -0,0 +1,37 @@
Bootstrap: docker
From: ubuntu:latest
%files
mpi_pingpong.c /opt
%environment
export OMPI_DIR=/home0/opt/openmpi
export SINGULARITY_OMPI_DIR=$OMPI_DIR
export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin
export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib
%post
echo "Installing required packages..."
apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file
echo "Installing Open MPI"
export OMPI_DIR=/home0/opt/openmpi
export OMPI_VERSION=4.0.3
export OMPI_URL="https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-$OMPI_VERSION.tar.bz2"
mkdir -p /tmp/ompi
mkdir -p /opt
chmod a+w /opt/
chmod a+r /opt/
ls -la /tmp/ompi
# Download
cd /tmp/ompi && wget -O openmpi-$OMPI_VERSION.tar.bz2 $OMPI_URL && tar -xjf openmpi-$OMPI_VERSION.tar.bz2
ls -la
# Compile and install
cd /tmp/ompi/openmpi-$OMPI_VERSION && ./configure --prefix=$OMPI_DIR && make install
# Set env variables so we can compile our application
export PATH=$OMPI_DIR/bin:$PATH
export LD_LIBRARY_PATH=$OMPI_DIR/lib:$LD_LIBRARY_PATH
export MANPATH=$OMPI_DIR/share/man:$MANPATH
# rm -r tmp/mpi
echo "Compiling the MPI application..."
cd /opt && mpicc -o mpi_pingpong mpi_pingpong.c

View File

@ -0,0 +1,65 @@
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MAX_ITER 1000
int main (int argc, char **argv) {
int rc;
int size;
int myrank;
size_t max_send = 1<<22;
char *send_buf = (char*)malloc(sizeof(char)*max_send);
char *recv_buf = (char*)malloc(sizeof(char)*max_send);
size_t send_size;
clock_t start, end;
rc = MPI_Init (&argc, &argv);
if (rc != MPI_SUCCESS) {
fprintf (stderr, "MPI_Init() failed");
return EXIT_FAILURE;
}
rc = MPI_Comm_size (MPI_COMM_WORLD, &size);
if (rc != MPI_SUCCESS) {
fprintf (stderr, "MPI_Comm_size() failed");
goto exit_with_error;
}
if(size!= 2) {
fprintf(stderr, "This process requieres exact two processes\n");
}
rc = MPI_Comm_rank (MPI_COMM_WORLD, &myrank);
if (rc != MPI_SUCCESS) {
fprintf (stderr, "MPI_Comm_rank() failed");
goto exit_with_error;
}
if(myrank==0)
fprintf (stdout, "Size\t Time(ms)\n");
for(send_size=1 ; send_size<= max_send; send_size*=2){
for (int i = 0; i<MAX_ITER+2; i++) {
if(i == 2)
start = clock();
if(myrank == 0){
MPI_Send(send_buf, send_size, MPI_CHAR, 1, 0x4, MPI_COMM_WORLD);
MPI_Recv(recv_buf, send_size, MPI_CHAR, 1, 0x5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
else {
MPI_Recv(recv_buf, send_size, MPI_CHAR, 0, 0x4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(send_buf, send_size, MPI_CHAR, 0, 0x5, MPI_COMM_WORLD);
}
}
end= clock();
double time_taken = (double)(end-start)/CLOCKS_PER_SEC;
if(myrank == 0 )
fprintf(stdout, "%ld\t %f\n", send_size, time_taken);
}
MPI_Finalize();
return EXIT_SUCCESS;
exit_with_error:
MPI_Finalize();
return EXIT_FAILURE;
}

View File

@ -0,0 +1,18 @@
Bootstrap: docker
From: ubuntu:latest
%files
openmp_example.c /opt
%environment
export OMPI_DIR=/home0/opt/openmpi
export SINGULARITY_OMPI_DIR=$OMPI_DIR
export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin
export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib
%post
echo "Installing required packages..."
apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file
echo "Compiling the MPI application..."
cd /opt && gcc -o openmp_example -fopenmp openmp_example.c

View File

@ -0,0 +1,14 @@
#include <stdio.h>
#include <omp.h>
int main() {
#pragma omp parallel
{
int id = omp_get_thread_num();
int data = id;
int total = omp_get_num_threads();
printf("Greetings from thread %d out of %d with Data %d\n", id, total, data);
}
printf("parallel for ends.\n");
return 0;
}

View File

@ -0,0 +1,36 @@
package main
import "slurm/submitjob"
import "slurm"
import "os/user"
import "os"
import "strconv"
import "fmt"
func main(){
job_desc := submit_job.Job_descriptor{}
job_desc.Script = "#! /bin/bash\n hostname \n env | grep SLURM "
dir, _ := os.Getwd()
user, _:= user.Current()
userid , _ := strconv.Atoi(user.Uid)
job_desc.User_id= uint32(userid)
groupid , _ := strconv.Atoi(user.Gid)
job_desc.Group_id= uint32(groupid)
job_desc.Name = "test_job"
job_desc.Partition="long"
job_desc.Time_limit = uint32(2)
job_desc.Min_nodes =uint32(1)
job_desc.Std_out = ("./out-%j.txt")
job_desc.Std_err = ("./err-%j.txt")
job_desc.Work_dir = dir
job_desc.Environment = []string{"SLURM_GO_JOB=TRUE", "SLURM_CONTAINER_JOB=FALSE"}
answer := submit_job.Submit_job(&job_desc)
if(answer.Error_code != 0) {
msg := slurm.GetErrorString(answer.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Submitted Job %d\n", answer.Job_id)
}

View File

@ -0,0 +1,127 @@
package main
import "slurm/submitjob"
import "slurm"
import "os"
import "strconv"
import "fmt"
import "os/exec"
import "path/filepath"
import "slurm/jobinfo"
import "time"
import "os/user"
func fileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
return false
}
return !info.IsDir()
}
func build_container(file_name,container_name string){
cmd := exec.Command("sudo", "/usr/local/bin/singularity", "build",container_name, file_name)
fmt.Print("Now build new container")
fmt.Printf("%s\n", cmd.String())
stdoutStderr, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf("error in creating container %s \n", err);
// return
}
fmt.Printf("%s\n", stdoutStderr)
}
func main(){
job_desc := submit_job.Job_descriptor{}
dir, _ := os.Getwd()
container := filepath.Join(dir, "mpi_container.img")
definition := filepath.Join(dir, "mpi_container.def")
if !fileExists(container){
build_container(definition,container)
}
if !fileExists(container){
return
}
/* use Cmd to create our script */
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n srun hostname \n"
cmd := exec.Command( "/home0/opt/openmpi/bin/mpirun", "-mca btl_tcp_if_include eth1", "/usr/local/bin/singularity", "exec",container, "/opt/mpi_pingpong" )
job_desc.Script+= cmd.String()
fmt.Printf("cmd %s\n", job_desc.Script)
user, _:= user.Current()
userid , _ := strconv.Atoi(user.Uid)
job_desc.User_id= uint32(userid)
groupid , _ := strconv.Atoi(user.Gid)
job_desc.Group_id= uint32(groupid)
job_desc.Name = "flex_mpi_job"
job_desc.Partition="long"
job_desc.Time_limit = uint32(60)
job_desc.Ntasks_per_node = uint16(1)
job_desc.Num_tasks = uint32(2)
job_desc.Std_out = ("./%j-out.txt")
job_desc.Std_err = ("./%j-err.txt")
job_desc.Work_dir = dir
time.Sleep(3 * time.Second)
answer := submit_job.Submit_job(&job_desc)
if(answer.Error_code != 0) {
msg := slurm.GetErrorString(answer.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Submitted Job %d\n", answer.Job_id)
time.Sleep(5 * time.Second)
job_list := job_info.Get_job(answer.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job := job_list.Job_list[0]
fmt.Printf("job %d is %s\n", answer.Job_id, job.Job_stateS)
state := job.Job_stateS
if state == "Pending" {
fmt.Printf("Move job %d to another partition \n", answer.Job_id)
var ops submit_job.Update_job_options
ops.Qos = "shortjobs"
ops.Partition = "short"
err2 := submit_job.Update_job(ops, uint32(answer.Job_id))
if err2!= uint32(0) {
fmt.Printf("error %s \n", slurm.GetErrorString(err2))
}
}
for state == "Pending" || state == "Running" {
time.Sleep(2 * time.Second)
job_list = job_info.Get_job(answer.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job = job_list.Job_list[0]
state = job.Job_stateS
fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS)
}
fmt.Printf("Total runtime Job %d %s\n",job.Job_id, job_info.Get_job_runtime(job).String() )
}

View File

@ -0,0 +1,111 @@
package main
import "slurm/submitjob"
import "slurm"
import "os/user"
import "os"
import "strconv"
import "fmt"
import "os/exec"
import "path/filepath"
import "slurm/jobinfo"
import "time"
func fileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
return false
}
return !info.IsDir()
}
func build_container(file_name,container_name string){
cmd := exec.Command("sudo","/usr/local/bin/singularity", "build",container_name, file_name)
fmt.Print("Now build new container")
fmt.Printf("%s\n", cmd.String())
stdoutStderr, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf("error in creating container %s \n", err)
fmt.Printf("%s\n", stdoutStderr)
// return
}
fmt.Printf("%s\n", stdoutStderr)
}
func main(){
job_desc := submit_job.Job_descriptor{}
dir, _ := os.Getwd()
container := filepath.Join(dir, "mpi_container.img")
definition := filepath.Join(dir, "mpi_container.def")
if !fileExists(container){
build_container(definition,container)
}
if !fileExists(container){
return
}
/* use Cmd to create our script */
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n"
cmd := exec.Command( "/home0/opt/openmpi/bin/mpirun", "-mca btl_tcp_if_include eth1", "/usr/local/bin/singularity", "exec",container, "/opt/mpi_pingpong" )
job_desc.Script+= cmd.String()
fmt.Printf("cmd %s\n", job_desc.Script)
user, _:= user.Current()
userid , _ := strconv.Atoi(user.Uid)
job_desc.User_id= uint32(userid)
groupid , _ := strconv.Atoi(user.Gid)
job_desc.Group_id= uint32(groupid)
job_desc.Name = "mpi_job"
job_desc.Partition="long"
job_desc.Time_limit = uint32(60)
job_desc.Min_nodes =uint32(2)
job_desc.Num_tasks = uint32(2)
job_desc.Std_out = ("./%j-out.txt")
job_desc.Std_err = ("./%j-err.txt")
job_desc.Work_dir = dir
answer := submit_job.Submit_job(&job_desc)
if(answer.Error_code != 0) {
msg := slurm.GetErrorString(answer.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Submitted Job %d\n", answer.Job_id)
job_list := job_info.Get_job(answer.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job := job_list.Job_list[0]
fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS)
state := job.Job_stateS
for state == "Pending" || state == "Running" {
time.Sleep(2 * time.Second)
job_list = job_info.Get_job(answer.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job = job_list.Job_list[0]
state = job.Job_stateS
fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS)
}
fmt.Printf("Total runtime Job %d: %s\n",job.Job_id, job_info.Get_job_runtime(job).String() )
}

View File

@ -0,0 +1,162 @@
package main
import "slurm/submitjob"
import "slurm"
import "os/user"
import "os"
import "strconv"
import "fmt"
import "os/exec"
import "path/filepath"
import "slurm/jobinfo"
import "time"
func fileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
return false
}
return !info.IsDir()
}
func build_container(file_name,container_name string){
cmd := exec.Command("sudo", "/usr/local/bin/singularity", "build",container_name, file_name)
fmt.Print("Now build new container")
fmt.Printf("%s\n", cmd.String())
stdoutStderr, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf("error in creating container %s \n", err)
fmt.Printf("%s\n", stdoutStderr)
// return
}
fmt.Printf("%s\n", stdoutStderr)
}
func main(){
job_desc := submit_job.Job_descriptor{}
dir, _ := os.Getwd()
container := filepath.Join(dir, "openmp_container.img")
definition := filepath.Join(dir, "openmp_container.def")
if !fileExists(container){
build_container(definition,container)
}
if !fileExists(container){
return
}
/* use Cmd to create our script */
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n"
job_desc.Script+= "export OMP_NUM_THREADS=$SLURM_JOB_CPUS_PER_NODE\n"
cmd := exec.Command( "/usr/local/bin/singularity", "exec",container, "/opt/openmp_example" )
job_desc.Script+= cmd.String()
fmt.Printf("cmd %s\n", job_desc.Script)
user, _:= user.Current()
userid , _ := strconv.Atoi(user.Uid)
job_desc.User_id= uint32(userid)
groupid , _ := strconv.Atoi(user.Gid)
job_desc.Group_id= uint32(groupid)
job_desc.Name = "test_job"
job_desc.Partition="long"
job_desc.Time_limit = uint32(60)
job_desc.Min_nodes =uint32(1)
job_desc.Num_tasks = uint32(1)
job_desc.Cpus_per_task = uint16(2)
job_desc.Std_out = ("./%j-out.txt")
job_desc.Std_err = ("./%j-err.txt")
job_desc.Work_dir = dir
answer := submit_job.Submit_job(&job_desc)
if(answer.Error_code != 0) {
msg := slurm.GetErrorString(answer.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Submitted Job %d\n", answer.Job_id)
/*Now, we submit the same jon again, ut with some oversubsciption */
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n"
job_desc.Script+= "export OMP_NUM_THREADS=4\n"
job_desc.Script+= cmd.String()
fmt.Printf("cmd %s\n", job_desc.Script)
answer2 := submit_job.Submit_job(&job_desc)
if(answer2.Error_code != 0) {
msg := slurm.GetErrorString(answer.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
fmt.Printf("Submitted Job %d\n", answer2.Job_id)
job_list := job_info.Get_job(answer.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job := job_list.Job_list[0]
fmt.Printf("job is %s\n",job.Job_stateS)
state := job.Job_stateS
for state == "Pending" || state == "Running" {
time.Sleep(2 * time.Second)
job_list = job_info.Get_job(answer.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job = job_list.Job_list[0]
state = job.Job_stateS
fmt.Printf("job is %s\n",job.Job_stateS)
}
fmt.Printf("Total runtime first job %s\n",job_info.Get_job_runtime(job).String() )
/*wait for second job */
job_list = job_info.Get_job(answer2.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job = job_list.Job_list[0]
fmt.Printf("job is %s\n",job.Job_stateS)
state = job.Job_stateS
for state == "Pending" || state == "Running" {
time.Sleep(2 * time.Second)
job_list = job_info.Get_job(answer2.Job_id)
if job_list.Error_code != 0 {
msg := slurm.GetErrorString(job_list.Error_code)
fmt.Printf("Error: %s\n" ,msg)
return
}
job = job_list.Job_list[0]
state = job.Job_stateS
fmt.Printf("job is %s\n",job.Job_stateS)
}
fmt.Printf("Total runtime second job %s\n",job_info.Get_job_runtime(job).String() )
}

View File

@ -0,0 +1,28 @@
package main
import "slurm/submitjob"
import "slurm"
import "os"
import "strconv"
import "fmt"
func main(){
if len(os.Args)<4 {
fmt.Printf("Synthax specify JobID, qos and partition \n")
return
}
var ops submit_job.Update_job_options
id,err := strconv.Atoi(os.Args[1])
if err != nil {
fmt.Printf("Invalid job id (no int) %s\n", os.Args[1] )
return
}
ops.Qos = os.Args[2]
ops.Partition = os.Args[3]
err2 := submit_job.Update_job(ops, uint32(id))
if err2!= uint32(0) {
fmt.Printf("error %s \n", slurm.GetErrorString(err2))
}
}