slurm init
This commit is contained in:
parent
1b0c2119cb
commit
8ae093d187
|
@ -0,0 +1,121 @@
|
||||||
|
/*These are some extra functions to work with slurm in go
|
||||||
|
** They are seperated, since they don't use the slurm-API
|
||||||
|
** but wrap arround the SLURM comand line tools */
|
||||||
|
package extra
|
||||||
|
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
"errors"
|
||||||
|
"path/filepath"
|
||||||
|
"slurm/jobinfo"
|
||||||
|
"slurm"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
var slurm_path string
|
||||||
|
|
||||||
|
|
||||||
|
func find_slurm_path () {
|
||||||
|
var err error
|
||||||
|
var path string
|
||||||
|
path=os.Getenv("SLURM_PATH")
|
||||||
|
if path == " "{
|
||||||
|
path, err = exec.LookPath("sinfo")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("could not find slurm executables\n Either add slum-bins to your PATH or define SLURM_PATH\n")
|
||||||
|
} else {
|
||||||
|
slurm_path=strings.TrimSuffix(path, "bin/sinfo")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
test_path := filepath.Join(path, "bin/sinfo")
|
||||||
|
_, err := os.Stat(test_path)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
fmt.Printf("Slurm executable sinfo does no exist at %s\n", test_path)
|
||||||
|
} else {
|
||||||
|
slurm_path = path
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Cancel_job( JobId uint32) error{
|
||||||
|
find_slurm_path()
|
||||||
|
if slurm_path == "" {
|
||||||
|
return errors.New("Cannot find slurm executable")
|
||||||
|
}
|
||||||
|
job_list := job_info.Get_job(JobId)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf(msg)
|
||||||
|
return errors.New(msg)
|
||||||
|
}
|
||||||
|
path := filepath.Join(slurm_path,"bin","scancel")
|
||||||
|
cmd := exec.Command(path, strconv.FormatInt(int64(JobId), 10))
|
||||||
|
fmt.Print(cmd.String())
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err!= nil {
|
||||||
|
msg := string(out) + err.Error()
|
||||||
|
return errors.New(msg)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Acc_Job_info struct {
|
||||||
|
JobId uint32;
|
||||||
|
User string;
|
||||||
|
Account string;
|
||||||
|
State string;
|
||||||
|
JobName string;
|
||||||
|
}
|
||||||
|
var sacct_format_string string
|
||||||
|
|
||||||
|
func parse_sacct_output(input string) []Acc_Job_info {
|
||||||
|
var job_list []Acc_Job_info
|
||||||
|
lines := strings.Split(string(input), "\n")
|
||||||
|
fmt.Printf("len %d\n",len(lines)-1)
|
||||||
|
for l := range lines {
|
||||||
|
var job_info Acc_Job_info
|
||||||
|
elements := strings.Split(lines[l], "|")
|
||||||
|
if len(elements) < 5 {
|
||||||
|
break //Well, this is not clean, but keep it like this for Now
|
||||||
|
}
|
||||||
|
id, ierr := strconv.Atoi(elements[0])
|
||||||
|
|
||||||
|
if ierr != nil {
|
||||||
|
break //we have no useable entry here but something like 323.batch . Ignore these for now
|
||||||
|
}
|
||||||
|
job_info.JobId =uint32(id)
|
||||||
|
job_info.User = elements[1]
|
||||||
|
job_info.Account = elements[2]
|
||||||
|
job_info.State = elements[3]
|
||||||
|
job_info.JobName =elements[4]
|
||||||
|
job_list = append(job_list, job_info)
|
||||||
|
}
|
||||||
|
return job_list
|
||||||
|
}
|
||||||
|
|
||||||
|
func Get_job_info_accounting(JobId uint32 ) ([]Acc_Job_info, error) {
|
||||||
|
|
||||||
|
sacct_format_string = "JobId,user,account,state,JobName"
|
||||||
|
find_slurm_path()
|
||||||
|
if slurm_path == "" {
|
||||||
|
return nil, errors.New("Cannot find slurm executable")
|
||||||
|
}
|
||||||
|
path := filepath.Join(slurm_path,"bin","sacct")
|
||||||
|
cmd:= exec.Command(path, "-j", strconv.FormatInt(int64(JobId), 10),"--format", sacct_format_string,"-p","-n")
|
||||||
|
//fmt.Printf(cmd.String())
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err!= nil {
|
||||||
|
msg := string(out) + err.Error()
|
||||||
|
return nil, errors.New(msg)
|
||||||
|
}
|
||||||
|
list := parse_sacct_output(string(out))
|
||||||
|
|
||||||
|
|
||||||
|
return list, nil
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,278 @@
|
||||||
|
package node_info
|
||||||
|
/*
|
||||||
|
#cgo LDFLAGS: -lslurm
|
||||||
|
#include<stdlib.h>
|
||||||
|
#include<slurm/slurm.h>
|
||||||
|
#include<slurm/slurm_errno.h>
|
||||||
|
inline uint8_t uint8_ptr(uint8_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
inline int8_t int8_ptr(int8_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
uint16_t uint16_ptr(uint16_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
inline int16_t int16_ptr(int16_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
inline uint32_t uint32_ptr(uint32_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
inline int32_t int32_ptr(int32_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
inline uint64_t uint64_ptr(uint64_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
inline int64_t int64_ptr(int16_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
struct node_info_msg *get_node_info(){
|
||||||
|
struct node_info_msg* node_buffer;
|
||||||
|
if(slurm_load_node ((time_t) NULL,
|
||||||
|
&node_buffer, SHOW_ALL))
|
||||||
|
return NULL;
|
||||||
|
return node_buffer;
|
||||||
|
}
|
||||||
|
struct node_info_msg *get_single_node_info(char* name){
|
||||||
|
struct node_info_msg* node_buffer;
|
||||||
|
if( slurm_load_node_single (&node_buffer, name, SHOW_DETAIL))
|
||||||
|
return NULL;
|
||||||
|
return node_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_info* node_from_list(struct node_info_msg *list, int i){
|
||||||
|
return &list->node_array[i];
|
||||||
|
}
|
||||||
|
void free_node_buffer(void* buffer){
|
||||||
|
|
||||||
|
slurm_free_node_info_msg ((struct node_info_msg*)buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
|
type Node_info struct {
|
||||||
|
Arch string;
|
||||||
|
Boards uint16;
|
||||||
|
Boot_time int64;
|
||||||
|
Cluster_name string;
|
||||||
|
Cores uint16;
|
||||||
|
Core_spec_cnt uint16;
|
||||||
|
Cpu_bind uint32;
|
||||||
|
Cpu_load uint32;
|
||||||
|
Free_mem uint64;
|
||||||
|
Cpus uint16;
|
||||||
|
Cpu_spec_list string;
|
||||||
|
Features string;
|
||||||
|
Features_act string;
|
||||||
|
Gres string;
|
||||||
|
Gres_drain string;
|
||||||
|
Gres_used string;
|
||||||
|
Mcs_label string;
|
||||||
|
Mem_spec_limit uint64;
|
||||||
|
Name string;
|
||||||
|
Next_state uint32;
|
||||||
|
Node_addr string;
|
||||||
|
Node_hostname string;
|
||||||
|
Node_state uint32;
|
||||||
|
Os string;
|
||||||
|
Owner uint32;
|
||||||
|
Partitions string;
|
||||||
|
Port uint16;
|
||||||
|
Real_memory uint64;
|
||||||
|
Reason string;
|
||||||
|
Reason_time int64;
|
||||||
|
Reason_uid uint32;
|
||||||
|
Slurmd_start_time int64;
|
||||||
|
Sockets uint16;
|
||||||
|
Threads uint16;
|
||||||
|
Tmp_disk uint32;
|
||||||
|
Weight uint32;
|
||||||
|
Tres_fmt_str string;
|
||||||
|
Version string;
|
||||||
|
}
|
||||||
|
func Node_info_convert_c_to_go(c_struct *C.struct_node_info) Node_info{
|
||||||
|
var go_struct Node_info
|
||||||
|
|
||||||
|
go_struct.Arch = C.GoString(c_struct.arch)
|
||||||
|
go_struct.Boards = uint16(c_struct.boards)
|
||||||
|
go_struct.Boot_time = int64(c_struct.boot_time)
|
||||||
|
go_struct.Cluster_name = C.GoString(c_struct.cluster_name)
|
||||||
|
go_struct.Cores = uint16(c_struct.cores)
|
||||||
|
go_struct.Core_spec_cnt = uint16(c_struct.core_spec_cnt)
|
||||||
|
go_struct.Cpu_bind = uint32(c_struct.cpu_bind)
|
||||||
|
go_struct.Cpu_load = uint32(c_struct.cpu_load)
|
||||||
|
go_struct.Free_mem = uint64(c_struct.free_mem)
|
||||||
|
go_struct.Cpus = uint16(c_struct.cpus)
|
||||||
|
go_struct.Cpu_spec_list = C.GoString(c_struct.cpu_spec_list)
|
||||||
|
go_struct.Features = C.GoString(c_struct.features)
|
||||||
|
go_struct.Features_act = C.GoString(c_struct.features_act)
|
||||||
|
go_struct.Gres = C.GoString(c_struct.gres)
|
||||||
|
go_struct.Gres_drain = C.GoString(c_struct.gres_drain)
|
||||||
|
go_struct.Gres_used = C.GoString(c_struct.gres_used)
|
||||||
|
go_struct.Mcs_label = C.GoString(c_struct.mcs_label)
|
||||||
|
go_struct.Mem_spec_limit = uint64(c_struct.mem_spec_limit)
|
||||||
|
go_struct.Name = C.GoString(c_struct.name)
|
||||||
|
go_struct.Next_state = uint32(c_struct.next_state)
|
||||||
|
go_struct.Node_addr = C.GoString(c_struct.node_addr)
|
||||||
|
go_struct.Node_hostname = C.GoString(c_struct.node_hostname)
|
||||||
|
go_struct.Node_state = uint32(c_struct.node_state)
|
||||||
|
go_struct.Os = C.GoString(c_struct.os)
|
||||||
|
go_struct.Owner = uint32(c_struct.owner)
|
||||||
|
go_struct.Partitions = C.GoString(c_struct.partitions)
|
||||||
|
go_struct.Port = uint16(c_struct.port)
|
||||||
|
go_struct.Real_memory = uint64(c_struct.real_memory)
|
||||||
|
go_struct.Reason = C.GoString(c_struct.reason)
|
||||||
|
go_struct.Reason_time = int64(c_struct.reason_time)
|
||||||
|
go_struct.Reason_uid = uint32(c_struct.reason_uid)
|
||||||
|
go_struct.Slurmd_start_time = int64(c_struct.slurmd_start_time)
|
||||||
|
go_struct.Sockets = uint16(c_struct.sockets)
|
||||||
|
go_struct.Threads = uint16(c_struct.threads)
|
||||||
|
go_struct.Tmp_disk = uint32(c_struct.tmp_disk)
|
||||||
|
go_struct.Weight = uint32(c_struct.weight)
|
||||||
|
go_struct.Tres_fmt_str = C.GoString(c_struct.tres_fmt_str)
|
||||||
|
go_struct.Version = C.GoString(c_struct.version)
|
||||||
|
return go_struct
|
||||||
|
}
|
||||||
|
|
||||||
|
func State_to_string(state uint32) string{
|
||||||
|
|
||||||
|
switch s := C.uint16_t(state); s {
|
||||||
|
case C.NODE_STATE_UNKNOWN:
|
||||||
|
return "node state unknown"
|
||||||
|
case C.NODE_STATE_DOWN:
|
||||||
|
return "node state down"
|
||||||
|
case C.NODE_STATE_IDLE:
|
||||||
|
return "node state idle"
|
||||||
|
case C.NODE_STATE_ALLOCATED:
|
||||||
|
return "node state allocated"
|
||||||
|
case C.NODE_STATE_ERROR:
|
||||||
|
return "node state error"
|
||||||
|
case C.NODE_STATE_MIXED:
|
||||||
|
return "node state mixed"
|
||||||
|
case C.NODE_STATE_FUTURE:
|
||||||
|
return "node state future"
|
||||||
|
case C.NODE_STATE_END:
|
||||||
|
return "node state end"
|
||||||
|
}
|
||||||
|
return "Unkown state"
|
||||||
|
}
|
||||||
|
|
||||||
|
func Print_node_info(go_struct Node_info){
|
||||||
|
fmt.Printf("%s:\t %s\n","arch", go_struct.Arch)
|
||||||
|
fmt.Printf("%s:\t %d\n","boards", go_struct.Boards)
|
||||||
|
fmt.Printf("%s:\t %d\n","boot time", go_struct.Boot_time)
|
||||||
|
fmt.Printf("%s:\t %s\n","cluster name", go_struct.Cluster_name)
|
||||||
|
fmt.Printf("%s:\t %d\n","cores", go_struct.Cores)
|
||||||
|
fmt.Printf("%s:\t %d\n","core spec cnt", go_struct.Core_spec_cnt)
|
||||||
|
fmt.Printf("%s:\t %d\n","cpu bind", go_struct.Cpu_bind)
|
||||||
|
fmt.Printf("%s:\t %d\n","cpu load", go_struct.Cpu_load)
|
||||||
|
fmt.Printf("%s:\t %d\n","free mem", go_struct.Free_mem)
|
||||||
|
fmt.Printf("%s:\t %d\n","cpus", go_struct.Cpus)
|
||||||
|
fmt.Printf("%s:\t %s\n","cpu spec list", go_struct.Cpu_spec_list)
|
||||||
|
fmt.Printf("%s:\t %s\n","features", go_struct.Features)
|
||||||
|
fmt.Printf("%s:\t %s\n","features act", go_struct.Features_act)
|
||||||
|
fmt.Printf("%s:\t %s\n","gres", go_struct.Gres)
|
||||||
|
fmt.Printf("%s:\t %s\n","gres drain", go_struct.Gres_drain)
|
||||||
|
fmt.Printf("%s:\t %s\n","gres used", go_struct.Gres_used)
|
||||||
|
fmt.Printf("%s:\t %s\n","mcs label", go_struct.Mcs_label)
|
||||||
|
fmt.Printf("%s:\t %d\n","mem spec limit", go_struct.Mem_spec_limit)
|
||||||
|
fmt.Printf("%s:\t %s\n","name", go_struct.Name)
|
||||||
|
fmt.Printf("%s:\t %d\n","next state", go_struct.Next_state)
|
||||||
|
fmt.Printf("%s:\t %s\n","node addr", go_struct.Node_addr)
|
||||||
|
fmt.Printf("%s:\t %s\n","node hostname", go_struct.Node_hostname)
|
||||||
|
fmt.Printf("%s:\t %d\n","node state", go_struct.Node_state)
|
||||||
|
fmt.Printf("%s:\t %s\n","os", go_struct.Os)
|
||||||
|
fmt.Printf("%s:\t %d\n","owner", go_struct.Owner)
|
||||||
|
fmt.Printf("%s:\t %s\n","partitions", go_struct.Partitions)
|
||||||
|
fmt.Printf("%s:\t %d\n","port", go_struct.Port)
|
||||||
|
fmt.Printf("%s:\t %d\n","real memory", go_struct.Real_memory)
|
||||||
|
fmt.Printf("%s:\t %s\n","reason", go_struct.Reason)
|
||||||
|
fmt.Printf("%s:\t %d\n","reason time", go_struct.Reason_time)
|
||||||
|
fmt.Printf("%s:\t %d\n","reason uid", go_struct.Reason_uid)
|
||||||
|
fmt.Printf("%s:\t %d\n","slurmd start time", go_struct.Slurmd_start_time)
|
||||||
|
fmt.Printf("%s:\t %d\n","sockets", go_struct.Sockets)
|
||||||
|
fmt.Printf("%s:\t %d\n","threads", go_struct.Threads)
|
||||||
|
fmt.Printf("%s:\t %d\n","tmp disk", go_struct.Tmp_disk)
|
||||||
|
fmt.Printf("%s:\t %d\n","weight", go_struct.Weight)
|
||||||
|
fmt.Printf("%s:\t %s\n","tres fmt str", go_struct.Tres_fmt_str)
|
||||||
|
fmt.Printf("%s:\t %s\n","version", go_struct.Version)
|
||||||
|
}
|
||||||
|
type Node_info_msg struct {
|
||||||
|
Last_update int64;
|
||||||
|
Record_count uint32;
|
||||||
|
Error_code uint32;
|
||||||
|
Node_list []Node_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
func Get_all_nodes() Node_info_msg {
|
||||||
|
var go_node_buffer Node_info_msg
|
||||||
|
c_node_buffer := C.get_node_info()
|
||||||
|
if c_node_buffer == nil {
|
||||||
|
go_node_buffer.Last_update = int64(0)
|
||||||
|
go_node_buffer.Record_count = uint32(0)
|
||||||
|
go_node_buffer.Error_code = uint32(C.slurm_get_errno())
|
||||||
|
return go_node_buffer
|
||||||
|
}
|
||||||
|
go_node_buffer.Last_update = int64(c_node_buffer.last_update)
|
||||||
|
go_node_buffer.Record_count = uint32(c_node_buffer.record_count)
|
||||||
|
go_node_buffer.Node_list =make([]Node_info,c_node_buffer.record_count, c_node_buffer.record_count)
|
||||||
|
for i:=uint32(0); i<go_node_buffer.Record_count; i++ {
|
||||||
|
node := C.node_from_list(c_node_buffer, C.int(i))
|
||||||
|
go_node := Node_info_convert_c_to_go(node)
|
||||||
|
go_node_buffer.Node_list[i]=go_node
|
||||||
|
}
|
||||||
|
C.slurm_free_node_info_msg (c_node_buffer);
|
||||||
|
|
||||||
|
return go_node_buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
func Get_node_info (name string) Node_info_msg {
|
||||||
|
|
||||||
|
var go_node_buffer Node_info_msg
|
||||||
|
c_name := C.CString(name)
|
||||||
|
defer C.free(unsafe.Pointer(c_name))
|
||||||
|
c_node_buffer := C.get_single_node_info(c_name)
|
||||||
|
if c_node_buffer == nil {
|
||||||
|
go_node_buffer.Last_update = int64(0)
|
||||||
|
go_node_buffer.Record_count = uint32(0)
|
||||||
|
go_node_buffer.Error_code = uint32(C.slurm_get_errno())
|
||||||
|
|
||||||
|
return go_node_buffer;
|
||||||
|
}
|
||||||
|
go_node_buffer.Last_update = int64(c_node_buffer.last_update)
|
||||||
|
go_node_buffer.Record_count = uint32(c_node_buffer.record_count)
|
||||||
|
go_node_buffer.Node_list =make([]Node_info,c_node_buffer.record_count, c_node_buffer.record_count)
|
||||||
|
for i:=uint32(0); i<go_node_buffer.Record_count; i++ {
|
||||||
|
node := C.node_from_list(c_node_buffer, C.int(i))
|
||||||
|
go_node := Node_info_convert_c_to_go(node)
|
||||||
|
go_node_buffer.Node_list[i]=go_node
|
||||||
|
}
|
||||||
|
C.slurm_free_node_info_msg (c_node_buffer);
|
||||||
|
|
||||||
|
return go_node_buffer
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,219 @@
|
||||||
|
package partition_info
|
||||||
|
/*
|
||||||
|
#cgo LDFLAGS: -lslurm
|
||||||
|
#include<stdlib.h>
|
||||||
|
#include<slurm/slurm.h>
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t uint8_ptr(uint8_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
int8_t int8_ptr(int8_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
uint16_t uint16_ptr(uint16_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
int16_t int16_ptr(int16_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
uint32_t uint32_ptr(uint32_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
int32_t int32_ptr(int32_t* pointer, int off) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
pointer+=off;
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
uint64_t uint64_ptr(uint64_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
int64_t int64_ptr(int16_t* pointer) {
|
||||||
|
if (NULL == pointer) {
|
||||||
|
return -1;}
|
||||||
|
return *pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct partition_info_msg *get_partition_info(){
|
||||||
|
struct partition_info_msg* partition_buffer;
|
||||||
|
if( slurm_load_partitions ((time_t) NULL,
|
||||||
|
&partition_buffer, SHOW_ALL))
|
||||||
|
return NULL;
|
||||||
|
return partition_buffer;
|
||||||
|
}
|
||||||
|
struct partition_info* partition_from_list(struct partition_info_msg *list, int i){
|
||||||
|
return &list->partition_array[i];
|
||||||
|
}
|
||||||
|
void free_partition_buffer(void* buffer){
|
||||||
|
|
||||||
|
slurm_free_partition_info_msg ((struct partition_info_msg*)buffer);
|
||||||
|
}
|
||||||
|
int find_node_inx(int32_t* node){
|
||||||
|
int ret = 0;
|
||||||
|
while(*node != -1) { node++; ret++;};
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
type Partition_info struct {
|
||||||
|
Allow_alloc_nodes string;
|
||||||
|
Allow_accounts string;
|
||||||
|
Allow_groups string;
|
||||||
|
Allow_qos string;
|
||||||
|
Alternate string;
|
||||||
|
Billing_weights_str string;
|
||||||
|
Cluster_name string;
|
||||||
|
Cr_type uint16;
|
||||||
|
Cpu_bind uint32;
|
||||||
|
Def_mem_per_cpu uint64;
|
||||||
|
Default_time uint32;
|
||||||
|
Deny_accounts string;
|
||||||
|
Deny_qos string;
|
||||||
|
Flags uint16;
|
||||||
|
Grace_time uint32;
|
||||||
|
Job_defaults_str string;
|
||||||
|
Max_cpus_per_node uint32;
|
||||||
|
Max_mem_per_cpu uint64;
|
||||||
|
Max_nodes uint32;
|
||||||
|
Max_share uint16;
|
||||||
|
Max_time uint32;
|
||||||
|
Min_nodes uint32;
|
||||||
|
Name string;
|
||||||
|
Node_inx[] int32;
|
||||||
|
Nodes string;
|
||||||
|
Over_time_limit uint16;
|
||||||
|
Preempt_mode uint16;
|
||||||
|
Priority_job_factor uint16;
|
||||||
|
Priority_tier uint16;
|
||||||
|
Qos_char string;
|
||||||
|
State_up uint16;
|
||||||
|
Total_cpus uint32;
|
||||||
|
Total_nodes uint32;
|
||||||
|
Tres_fmt_str string;
|
||||||
|
}
|
||||||
|
func Partition_info_convert_c_to_go(c_struct *C.struct_partition_info) Partition_info{
|
||||||
|
var go_struct Partition_info
|
||||||
|
|
||||||
|
go_struct.Allow_alloc_nodes = C.GoString(c_struct.allow_alloc_nodes)
|
||||||
|
go_struct.Allow_accounts = C.GoString(c_struct.allow_accounts)
|
||||||
|
go_struct.Allow_groups = C.GoString(c_struct.allow_groups)
|
||||||
|
go_struct.Allow_qos = C.GoString(c_struct.allow_qos)
|
||||||
|
go_struct.Alternate = C.GoString(c_struct.alternate)
|
||||||
|
go_struct.Billing_weights_str = C.GoString(c_struct.billing_weights_str)
|
||||||
|
go_struct.Cluster_name = C.GoString(c_struct.cluster_name)
|
||||||
|
go_struct.Cr_type = uint16(c_struct.cr_type)
|
||||||
|
go_struct.Cpu_bind = uint32(c_struct.cpu_bind)
|
||||||
|
go_struct.Def_mem_per_cpu = uint64(c_struct.def_mem_per_cpu)
|
||||||
|
go_struct.Default_time = uint32(c_struct.default_time)
|
||||||
|
go_struct.Deny_accounts = C.GoString(c_struct.deny_accounts)
|
||||||
|
go_struct.Deny_qos = C.GoString(c_struct.deny_qos)
|
||||||
|
go_struct.Flags = uint16(c_struct.flags)
|
||||||
|
go_struct.Grace_time = uint32(c_struct.grace_time)
|
||||||
|
go_struct.Job_defaults_str = C.GoString(c_struct.job_defaults_str)
|
||||||
|
go_struct.Max_cpus_per_node = uint32(c_struct.max_cpus_per_node)
|
||||||
|
go_struct.Max_mem_per_cpu = uint64(c_struct.max_mem_per_cpu)
|
||||||
|
go_struct.Max_nodes = uint32(c_struct.max_nodes)
|
||||||
|
go_struct.Max_share = uint16(c_struct.max_share)
|
||||||
|
go_struct.Max_time = uint32(c_struct.max_time)
|
||||||
|
go_struct.Min_nodes = uint32(c_struct.min_nodes)
|
||||||
|
go_struct.Name = C.GoString(c_struct.name)
|
||||||
|
t := C.find_node_inx(c_struct.node_inx)
|
||||||
|
|
||||||
|
fmt.Printf("%d", t)
|
||||||
|
go_struct.Node_inx = make([]int32, t,t)
|
||||||
|
for i:=int32(0); i<int32(t) ; i++{
|
||||||
|
go_struct.Node_inx[i] = int32(C.int32_ptr(c_struct.node_inx,C.int(i)))
|
||||||
|
|
||||||
|
}
|
||||||
|
go_struct.Nodes = C.GoString(c_struct.nodes)
|
||||||
|
go_struct.Over_time_limit = uint16(c_struct.over_time_limit)
|
||||||
|
go_struct.Preempt_mode = uint16(c_struct.preempt_mode)
|
||||||
|
go_struct.Priority_job_factor = uint16(c_struct.priority_job_factor)
|
||||||
|
go_struct.Priority_tier = uint16(c_struct.priority_tier)
|
||||||
|
go_struct.Qos_char = C.GoString(c_struct.qos_char)
|
||||||
|
go_struct.State_up = uint16(c_struct.state_up)
|
||||||
|
go_struct.Total_cpus = uint32(c_struct.total_cpus)
|
||||||
|
go_struct.Total_nodes = uint32(c_struct.total_nodes)
|
||||||
|
go_struct.Tres_fmt_str = C.GoString(c_struct.tres_fmt_str)
|
||||||
|
return go_struct
|
||||||
|
}
|
||||||
|
func Print_Partition_info(go_struct Partition_info){
|
||||||
|
fmt.Printf("%s:\t %s\n","allow alloc nodes", go_struct.Allow_alloc_nodes)
|
||||||
|
fmt.Printf("%s:\t %s\n","allow accounts", go_struct.Allow_accounts)
|
||||||
|
fmt.Printf("%s:\t %s\n","allow groups", go_struct.Allow_groups)
|
||||||
|
fmt.Printf("%s:\t %s\n","allow qos", go_struct.Allow_qos)
|
||||||
|
fmt.Printf("%s:\t %s\n","alternate", go_struct.Alternate)
|
||||||
|
fmt.Printf("%s:\t %s\n","billing weights str", go_struct.Billing_weights_str)
|
||||||
|
fmt.Printf("%s:\t %s\n","cluster name", go_struct.Cluster_name)
|
||||||
|
fmt.Printf("%s:\t %d\n","cr type", go_struct.Cr_type)
|
||||||
|
fmt.Printf("%s:\t %d\n","cpu bind", go_struct.Cpu_bind)
|
||||||
|
fmt.Printf("%s:\t %d\n","def mem per cpu", go_struct.Def_mem_per_cpu)
|
||||||
|
fmt.Printf("%s:\t %d\n","default time", go_struct.Default_time)
|
||||||
|
fmt.Printf("%s:\t %s\n","deny accounts", go_struct.Deny_accounts)
|
||||||
|
fmt.Printf("%s:\t %s\n","deny qos", go_struct.Deny_qos)
|
||||||
|
fmt.Printf("%s:\t %d\n","flags", go_struct.Flags)
|
||||||
|
fmt.Printf("%s:\t %d\n","grace time", go_struct.Grace_time)
|
||||||
|
fmt.Printf("%s:\t %s\n","job defaults str", go_struct.Job_defaults_str)
|
||||||
|
fmt.Printf("%s:\t %d\n","max cpus per node", go_struct.Max_cpus_per_node)
|
||||||
|
fmt.Printf("%s:\t %d\n","max mem per cpu", go_struct.Max_mem_per_cpu)
|
||||||
|
fmt.Printf("%s:\t %d\n","max nodes", go_struct.Max_nodes)
|
||||||
|
fmt.Printf("%s:\t %d\n","max share", go_struct.Max_share)
|
||||||
|
fmt.Printf("%s:\t %d\n","max time", go_struct.Max_time)
|
||||||
|
fmt.Printf("%s:\t %d\n","min nodes", go_struct.Min_nodes)
|
||||||
|
fmt.Printf("%s:\t %s\n","name", go_struct.Name)
|
||||||
|
fmt.Printf("%s:\t %d\n","node inx", go_struct.Node_inx)
|
||||||
|
fmt.Printf("%s:\t %s\n","nodes", go_struct.Nodes)
|
||||||
|
fmt.Printf("%s:\t %d\n","over time limit", go_struct.Over_time_limit)
|
||||||
|
fmt.Printf("%s:\t %d\n","preempt mode", go_struct.Preempt_mode)
|
||||||
|
fmt.Printf("%s:\t %d\n","priority job factor", go_struct.Priority_job_factor)
|
||||||
|
fmt.Printf("%s:\t %d\n","priority tier", go_struct.Priority_tier)
|
||||||
|
fmt.Printf("%s:\t %s\n","qos char", go_struct.Qos_char)
|
||||||
|
fmt.Printf("%s:\t %d\n","state up", go_struct.State_up)
|
||||||
|
fmt.Printf("%s:\t %d\n","total cpus", go_struct.Total_cpus)
|
||||||
|
fmt.Printf("%s:\t %d\n","total nodes", go_struct.Total_nodes)
|
||||||
|
fmt.Printf("%s:\t %s\n","tres fmt str", go_struct.Tres_fmt_str)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Partition_info_msg struct {
|
||||||
|
Last_update int64;
|
||||||
|
Record_count uint32;
|
||||||
|
Partition_list []Partition_info;
|
||||||
|
}
|
||||||
|
func Get_partitions() Partition_info_msg {
|
||||||
|
var go_partition_buffer Partition_info_msg
|
||||||
|
c_partition_buffer := C.get_partition_info()
|
||||||
|
if c_partition_buffer == nil{
|
||||||
|
go_partition_buffer.Last_update = int64(0)
|
||||||
|
go_partition_buffer.Record_count = uint32(0)
|
||||||
|
return go_partition_buffer;
|
||||||
|
}
|
||||||
|
go_partition_buffer.Last_update = int64(c_partition_buffer.last_update)
|
||||||
|
go_partition_buffer.Record_count = uint32(c_partition_buffer.record_count)
|
||||||
|
go_partition_buffer.Partition_list =make([]Partition_info,c_partition_buffer.record_count, c_partition_buffer.record_count)
|
||||||
|
for i:=uint32(0); i<go_partition_buffer.Record_count; i++ {
|
||||||
|
partition := C.partition_from_list(c_partition_buffer, C.int(i))
|
||||||
|
go_partition := Partition_info_convert_c_to_go(partition)
|
||||||
|
go_partition_buffer.Partition_list[i]=go_partition
|
||||||
|
}
|
||||||
|
C.slurm_free_partition_info_msg (c_partition_buffer);
|
||||||
|
|
||||||
|
return go_partition_buffer
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,20 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/extra"
|
||||||
|
import "fmt"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
func main(){
|
||||||
|
|
||||||
|
if len(os.Args)<2 {
|
||||||
|
fmt.Printf("Please specify Job ID\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
id,_ := strconv.Atoi(os.Args[1])
|
||||||
|
fmt.Printf("try to cancel %d\n", id)
|
||||||
|
err:= extra.Cancel_job(uint32( id))
|
||||||
|
if(err!= nil){
|
||||||
|
fmt.Printf(err.Error())
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/extra"
|
||||||
|
import "fmt"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
func main(){
|
||||||
|
|
||||||
|
if len(os.Args)<2 {
|
||||||
|
fmt.Printf("Please specify Job ID\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
id,_ := strconv.Atoi(os.Args[1])
|
||||||
|
|
||||||
|
jobs, err := extra. Get_job_info_accounting(uint32( id))
|
||||||
|
if err!= nil {
|
||||||
|
fmt.Printf(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("JobId\tuser\taccount\tstate\t\tJobName\n")
|
||||||
|
for i := range(jobs) {
|
||||||
|
fmt.Printf("%d\t%s\t%s\t%s\t%s\n", jobs[i].JobId, jobs[i].User, jobs[i].Account, jobs[i].State, jobs[i].JobName)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/jobinfo"
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
job_list := job_info.Get_all_jobs()
|
||||||
|
fmt.Printf("Found %d jobs \n", job_list.Record_count)
|
||||||
|
/* a little bit nicer */
|
||||||
|
fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n")
|
||||||
|
fmt.Printf("________________________________________________________________________________________________\n")
|
||||||
|
for i := range job_list.Job_list {
|
||||||
|
job := job_list.Job_list[i]
|
||||||
|
fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" ,
|
||||||
|
job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS ,
|
||||||
|
job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/nodeinfo"
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
node_list := node_info.Get_all_nodes()
|
||||||
|
fmt.Printf("Found %d nodes \n", node_list.Record_count)
|
||||||
|
|
||||||
|
|
||||||
|
/* a little bit nicer*/
|
||||||
|
fmt.Printf("name\t State\t\t\t Reason\t\t Tres\n")
|
||||||
|
fmt.Printf("________________________________________\n")
|
||||||
|
for i := range node_list.Node_list {
|
||||||
|
node := node_list.Node_list[i]
|
||||||
|
fmt.Printf("%s\t %s\t %s\t %s\n", node.Node_hostname, node_info.State_to_string(node.Node_state), node.Reason, node.Tres_fmt_str)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm"
|
||||||
|
import "fmt"
|
||||||
|
func main(){
|
||||||
|
|
||||||
|
version := int(0)
|
||||||
|
var config slurm.Ctl_conf
|
||||||
|
version = slurm.Version()
|
||||||
|
fmt.Printf("Version is %s\n", slurm.VersionString(version));
|
||||||
|
config = slurm.GetConfig()
|
||||||
|
slurm.Print_Ctl_conf(config)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/jobinfo"
|
||||||
|
import "slurm"
|
||||||
|
import "fmt"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
|
||||||
|
if len(os.Args)<2 {
|
||||||
|
fmt.Printf("Please specify Job ID\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
id,_ := strconv.Atoi(os.Args[1])
|
||||||
|
job_list := job_info.Get_job(uint32(id))
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
for i := range job_list.Job_list {
|
||||||
|
job_info.Print_Job_info(job_list.Job_list[i])
|
||||||
|
}
|
||||||
|
fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n")
|
||||||
|
fmt.Printf("________________________________________________________________________________________________\n")
|
||||||
|
for i := range job_list.Job_list {
|
||||||
|
job := job_list.Job_list[i]
|
||||||
|
fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" ,
|
||||||
|
job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS,
|
||||||
|
job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
end_time :=job_info.Get_job_endtime(uint32(id))
|
||||||
|
fmt.Printf("End-Time: %s\n", end_time)
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/nodeinfo"
|
||||||
|
import "slurm"
|
||||||
|
import "fmt"
|
||||||
|
import "os"
|
||||||
|
func main(){
|
||||||
|
if len(os.Args)<2 {
|
||||||
|
fmt.Printf("Please specify node name\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
name:= os.Args[1]
|
||||||
|
|
||||||
|
node_list := node_info.Get_node_info(name)
|
||||||
|
if(node_list.Error_code !=0) {
|
||||||
|
msg := slurm.GetErrorString(node_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("Found %d nodes \n", node_list.Record_count)
|
||||||
|
|
||||||
|
|
||||||
|
/* a little bit nicer*/
|
||||||
|
fmt.Printf("name\t State\t\t\t Reason\t\t Tres\n")
|
||||||
|
fmt.Printf("________________________________________\n")
|
||||||
|
for i := range node_list.Node_list {
|
||||||
|
node := node_list.Node_list[i]
|
||||||
|
fmt.Printf("%s\t %s\t %s\t %s\n", node.Node_hostname, node_info.State_to_string(node.Node_state), node.Reason, node.Tres_fmt_str)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/partitioninfo"
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
partition_list := partition_info.Get_partitions()
|
||||||
|
fmt.Printf("Found %d partions \n", partition_list.Record_count)
|
||||||
|
|
||||||
|
|
||||||
|
/* a little bit nicer */
|
||||||
|
fmt.Printf("Name\t Nodes\t\t\t Max_time(min)\t\t Tres\n")
|
||||||
|
fmt.Printf("________________________________________\n")
|
||||||
|
for i := range partition_list.Partition_list {
|
||||||
|
partition := partition_list.Partition_list[i]
|
||||||
|
fmt.Printf("%s\t %s\t %d\t %d\n", partition.Name, partition.Nodes, partition.Max_time, partition.Node_inx )
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/jobinfo"
|
||||||
|
import "slurm"
|
||||||
|
import "fmt"
|
||||||
|
import "os"
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
|
||||||
|
if len(os.Args)<2 {
|
||||||
|
fmt.Printf("Please specify username\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
name := os.Args[1]
|
||||||
|
job_list := job_info.Get_user_jobs(name)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n")
|
||||||
|
fmt.Printf("________________________________________________________________________________________________\n")
|
||||||
|
for i := range job_list.Job_list {
|
||||||
|
job := job_list.Job_list[i]
|
||||||
|
fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" ,
|
||||||
|
job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS ,
|
||||||
|
job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,109 @@
|
||||||
|
# Submission of jobs
|
||||||
|
This folder shows in a few more examples of how jobs can be submitted in Slurm. Some examples use containers.
|
||||||
|
Attention: The parameters for job names and partitions probably have to be adjusted!
|
||||||
|
|
||||||
|
# Simple Jobs
|
||||||
|
|
||||||
|
## submit_job.go
|
||||||
|
In this example, a simple bash-Jobs is submitted. The used partition is *long* (adapt probably).
|
||||||
|
```
|
||||||
|
job_desc.Partition="long"
|
||||||
|
```
|
||||||
|
|
||||||
|
The job sets two environment variables and executes a
|
||||||
|
```
|
||||||
|
hostname
|
||||||
|
env | grep SLUM
|
||||||
|
```
|
||||||
|
On a single node of the cluster (single task job).
|
||||||
|
The application does not wait until the hob is completed, but dirctly returns.
|
||||||
|
The (std) output is written to
|
||||||
|
out-jobid.txt, the std- error to err-jobid.txt
|
||||||
|
|
||||||
|
```
|
||||||
|
job_desc.Std_out = ("./out-%j.txt")
|
||||||
|
job_desc.Std_err = ("./err-%j.txt")
|
||||||
|
````
|
||||||
|
|
||||||
|
|
||||||
|
## update_job.go
|
||||||
|
This example allows to update the qos and the partition a job is running on. This can help to move the job to another queue with another partition.
|
||||||
|
Note to users: In theory, the API allows the update of the number of nodes and the tasks per node. However, since this is only allowed by root or a slurm admin, we do not include an example here.
|
||||||
|
Synthax
|
||||||
|
```
|
||||||
|
./update_job JobId qos partition
|
||||||
|
```
|
||||||
|
(Note: This requires that the Job with the Id JobID is already submitted and in a pending state)
|
||||||
|
|
||||||
|
|
||||||
|
# Container jobs
|
||||||
|
|
||||||
|
The following examples all submit a job that starts singulrity containers.
|
||||||
|
These containers, if they do not exist, are created. However, problems can arise if the user does not have sudo permissions..
|
||||||
|
|
||||||
|
## The containers
|
||||||
|
|
||||||
|
The first container is an MPI container. This is used by and `submit_mpi_containier.go` and `submit_mpi_and_update.go`. The definition is stored in `mpi_container.def`
|
||||||
|
It can also be created with the command
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo singularity build mpi_container.img mpi_container.def
|
||||||
|
```
|
||||||
|
|
||||||
|
The program mpi_pingppong (source code enclosed: `mpi_pingpong.c` ) is built into the container. It performs a ping-pong test between two processes.
|
||||||
|
|
||||||
|
This container uses the hybrid model, which assumes that MPI is installed on the cluter (to start the job) and installs it in the container itself. Works with OpenMPI.
|
||||||
|
|
||||||
|
The second container is an openmp container, including a sample OpenMP programm openmp_example (source code: ` openmp_example.c`).
|
||||||
|
It can also be created with the command:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo singularity build openmp_container.img openmp_container.def
|
||||||
|
```
|
||||||
|
|
||||||
|
This container is used bei `submit_openmp_container.go`.
|
||||||
|
|
||||||
|
|
||||||
|
## submit_mpi_containier.go
|
||||||
|
Submits a mpi-container job to the cluster. It runs to Processes on two nodes
|
||||||
|
```
|
||||||
|
job_desc.Min_nodes =uint32(2)
|
||||||
|
job_desc.Num_tasks = uint32(2)
|
||||||
|
```
|
||||||
|
The application blocks, until the job is completed. The (std) output is written to
|
||||||
|
jobid-out.txt, the std- error to jobId-err.txt
|
||||||
|
```
|
||||||
|
job_desc.Std_out = ("./%j-out.txt")
|
||||||
|
job_desc.Std_err = ("./%j-err.txt")
|
||||||
|
```
|
||||||
|
## submit_omp_container.go
|
||||||
|
Submits two openMP jobs to the cluster and wait, until they are completed.
|
||||||
|
Both jobs allocate *one process* for the job, but *two CPUs per task/process* (for multi-threading).
|
||||||
|
```
|
||||||
|
job_desc.Num_tasks = uint32(1)
|
||||||
|
job_desc.Cpus_per_task = uint16(2)
|
||||||
|
|
||||||
|
```
|
||||||
|
The first job reads the environment variable ` SLURM_JOB_CPUS_PER_NODE` and sets the number of openMP threads to exactly the number of cpus that are available per task/process.
|
||||||
|
```
|
||||||
|
job_desc.Script+= "export OMP_NUM_THREADS=$SLURM_JOB_CPUS_PER_NODE\n"
|
||||||
|
```
|
||||||
|
The second job sets the number of threads to 4 (which is oversuscribing because more threads are started than processes) and executes the same job.
|
||||||
|
```
|
||||||
|
job_desc.Script+= "export OMP_NUM_THREADS=4\n"
|
||||||
|
```
|
||||||
|
|
||||||
|
The program waits until both jobs are completed. The results are written to the two outputs files, similiar to `submit_mpi_container.go`
|
||||||
|
|
||||||
|
### submit_mpi_and_update.go
|
||||||
|
This application is dooing the same as `submit_mpi_container.go`
|
||||||
|
```
|
||||||
|
ops.Qos = "shortjobs"
|
||||||
|
ops.Partition = "short"
|
||||||
|
```
|
||||||
|
|
||||||
|
This situation, can, for example, be created my submitting longer, other jobs bevore in the background (depending on the partion size) and than start this application:
|
||||||
|
|
||||||
|
```
|
||||||
|
./submit_mpi_containier & ./submit_mpi_containier & ./submit_mpi_and_update
|
||||||
|
```
|
|
@ -0,0 +1,37 @@
|
||||||
|
Bootstrap: docker
|
||||||
|
From: ubuntu:latest
|
||||||
|
|
||||||
|
%files
|
||||||
|
mpi_pingpong.c /opt
|
||||||
|
|
||||||
|
%environment
|
||||||
|
export OMPI_DIR=/home0/opt/openmpi
|
||||||
|
export SINGULARITY_OMPI_DIR=$OMPI_DIR
|
||||||
|
export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin
|
||||||
|
export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib
|
||||||
|
|
||||||
|
%post
|
||||||
|
echo "Installing required packages..."
|
||||||
|
apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file
|
||||||
|
|
||||||
|
echo "Installing Open MPI"
|
||||||
|
export OMPI_DIR=/home0/opt/openmpi
|
||||||
|
export OMPI_VERSION=4.0.3
|
||||||
|
export OMPI_URL="https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-$OMPI_VERSION.tar.bz2"
|
||||||
|
mkdir -p /tmp/ompi
|
||||||
|
mkdir -p /opt
|
||||||
|
chmod a+w /opt/
|
||||||
|
chmod a+r /opt/
|
||||||
|
ls -la /tmp/ompi
|
||||||
|
# Download
|
||||||
|
cd /tmp/ompi && wget -O openmpi-$OMPI_VERSION.tar.bz2 $OMPI_URL && tar -xjf openmpi-$OMPI_VERSION.tar.bz2
|
||||||
|
ls -la
|
||||||
|
# Compile and install
|
||||||
|
cd /tmp/ompi/openmpi-$OMPI_VERSION && ./configure --prefix=$OMPI_DIR && make install
|
||||||
|
# Set env variables so we can compile our application
|
||||||
|
export PATH=$OMPI_DIR/bin:$PATH
|
||||||
|
export LD_LIBRARY_PATH=$OMPI_DIR/lib:$LD_LIBRARY_PATH
|
||||||
|
export MANPATH=$OMPI_DIR/share/man:$MANPATH
|
||||||
|
# rm -r tmp/mpi
|
||||||
|
echo "Compiling the MPI application..."
|
||||||
|
cd /opt && mpicc -o mpi_pingpong mpi_pingpong.c
|
|
@ -0,0 +1,65 @@
|
||||||
|
#include <mpi.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#define MAX_ITER 1000
|
||||||
|
int main (int argc, char **argv) {
|
||||||
|
int rc;
|
||||||
|
int size;
|
||||||
|
int myrank;
|
||||||
|
size_t max_send = 1<<22;
|
||||||
|
char *send_buf = (char*)malloc(sizeof(char)*max_send);
|
||||||
|
char *recv_buf = (char*)malloc(sizeof(char)*max_send);
|
||||||
|
size_t send_size;
|
||||||
|
clock_t start, end;
|
||||||
|
rc = MPI_Init (&argc, &argv);
|
||||||
|
if (rc != MPI_SUCCESS) {
|
||||||
|
fprintf (stderr, "MPI_Init() failed");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = MPI_Comm_size (MPI_COMM_WORLD, &size);
|
||||||
|
if (rc != MPI_SUCCESS) {
|
||||||
|
fprintf (stderr, "MPI_Comm_size() failed");
|
||||||
|
goto exit_with_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(size!= 2) {
|
||||||
|
|
||||||
|
fprintf(stderr, "This process requieres exact two processes\n");
|
||||||
|
}
|
||||||
|
rc = MPI_Comm_rank (MPI_COMM_WORLD, &myrank);
|
||||||
|
if (rc != MPI_SUCCESS) {
|
||||||
|
fprintf (stderr, "MPI_Comm_rank() failed");
|
||||||
|
goto exit_with_error;
|
||||||
|
}
|
||||||
|
if(myrank==0)
|
||||||
|
fprintf (stdout, "Size\t Time(ms)\n");
|
||||||
|
|
||||||
|
for(send_size=1 ; send_size<= max_send; send_size*=2){
|
||||||
|
for (int i = 0; i<MAX_ITER+2; i++) {
|
||||||
|
if(i == 2)
|
||||||
|
start = clock();
|
||||||
|
if(myrank == 0){
|
||||||
|
MPI_Send(send_buf, send_size, MPI_CHAR, 1, 0x4, MPI_COMM_WORLD);
|
||||||
|
MPI_Recv(recv_buf, send_size, MPI_CHAR, 1, 0x5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
MPI_Recv(recv_buf, send_size, MPI_CHAR, 0, 0x4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
|
MPI_Send(send_buf, send_size, MPI_CHAR, 0, 0x5, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end= clock();
|
||||||
|
double time_taken = (double)(end-start)/CLOCKS_PER_SEC;
|
||||||
|
if(myrank == 0 )
|
||||||
|
fprintf(stdout, "%ld\t %f\n", send_size, time_taken);
|
||||||
|
|
||||||
|
}
|
||||||
|
MPI_Finalize();
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
|
||||||
|
exit_with_error:
|
||||||
|
MPI_Finalize();
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
Bootstrap: docker
|
||||||
|
From: ubuntu:latest
|
||||||
|
|
||||||
|
%files
|
||||||
|
openmp_example.c /opt
|
||||||
|
|
||||||
|
%environment
|
||||||
|
export OMPI_DIR=/home0/opt/openmpi
|
||||||
|
export SINGULARITY_OMPI_DIR=$OMPI_DIR
|
||||||
|
export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin
|
||||||
|
export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib
|
||||||
|
|
||||||
|
%post
|
||||||
|
echo "Installing required packages..."
|
||||||
|
apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file
|
||||||
|
|
||||||
|
echo "Compiling the MPI application..."
|
||||||
|
cd /opt && gcc -o openmp_example -fopenmp openmp_example.c
|
|
@ -0,0 +1,14 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
int id = omp_get_thread_num();
|
||||||
|
int data = id;
|
||||||
|
int total = omp_get_num_threads();
|
||||||
|
printf("Greetings from thread %d out of %d with Data %d\n", id, total, data);
|
||||||
|
}
|
||||||
|
printf("parallel for ends.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
|
||||||
|
import "slurm/submitjob"
|
||||||
|
import "slurm"
|
||||||
|
import "os/user"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
import "fmt"
|
||||||
|
func main(){
|
||||||
|
job_desc := submit_job.Job_descriptor{}
|
||||||
|
job_desc.Script = "#! /bin/bash\n hostname \n env | grep SLURM "
|
||||||
|
dir, _ := os.Getwd()
|
||||||
|
|
||||||
|
user, _:= user.Current()
|
||||||
|
userid , _ := strconv.Atoi(user.Uid)
|
||||||
|
job_desc.User_id= uint32(userid)
|
||||||
|
groupid , _ := strconv.Atoi(user.Gid)
|
||||||
|
|
||||||
|
job_desc.Group_id= uint32(groupid)
|
||||||
|
job_desc.Name = "test_job"
|
||||||
|
job_desc.Partition="long"
|
||||||
|
job_desc.Time_limit = uint32(2)
|
||||||
|
job_desc.Min_nodes =uint32(1)
|
||||||
|
job_desc.Std_out = ("./out-%j.txt")
|
||||||
|
job_desc.Std_err = ("./err-%j.txt")
|
||||||
|
job_desc.Work_dir = dir
|
||||||
|
job_desc.Environment = []string{"SLURM_GO_JOB=TRUE", "SLURM_CONTAINER_JOB=FALSE"}
|
||||||
|
answer := submit_job.Submit_job(&job_desc)
|
||||||
|
if(answer.Error_code != 0) {
|
||||||
|
msg := slurm.GetErrorString(answer.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("Submitted Job %d\n", answer.Job_id)
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
|
||||||
|
import "slurm/submitjob"
|
||||||
|
import "slurm"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
import "fmt"
|
||||||
|
import "os/exec"
|
||||||
|
import "path/filepath"
|
||||||
|
import "slurm/jobinfo"
|
||||||
|
import "time"
|
||||||
|
import "os/user"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
func fileExists(filename string) bool {
|
||||||
|
info, err := os.Stat(filename)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !info.IsDir()
|
||||||
|
}
|
||||||
|
func build_container(file_name,container_name string){
|
||||||
|
|
||||||
|
cmd := exec.Command("sudo", "/usr/local/bin/singularity", "build",container_name, file_name)
|
||||||
|
fmt.Print("Now build new container")
|
||||||
|
fmt.Printf("%s\n", cmd.String())
|
||||||
|
stdoutStderr, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error in creating container %s \n", err);
|
||||||
|
// return
|
||||||
|
}
|
||||||
|
fmt.Printf("%s\n", stdoutStderr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
job_desc := submit_job.Job_descriptor{}
|
||||||
|
|
||||||
|
dir, _ := os.Getwd()
|
||||||
|
container := filepath.Join(dir, "mpi_container.img")
|
||||||
|
definition := filepath.Join(dir, "mpi_container.def")
|
||||||
|
if !fileExists(container){
|
||||||
|
build_container(definition,container)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !fileExists(container){
|
||||||
|
return
|
||||||
|
}
|
||||||
|
/* use Cmd to create our script */
|
||||||
|
|
||||||
|
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n srun hostname \n"
|
||||||
|
cmd := exec.Command( "/home0/opt/openmpi/bin/mpirun", "-mca btl_tcp_if_include eth1", "/usr/local/bin/singularity", "exec",container, "/opt/mpi_pingpong" )
|
||||||
|
job_desc.Script+= cmd.String()
|
||||||
|
fmt.Printf("cmd %s\n", job_desc.Script)
|
||||||
|
user, _:= user.Current()
|
||||||
|
userid , _ := strconv.Atoi(user.Uid)
|
||||||
|
job_desc.User_id= uint32(userid)
|
||||||
|
groupid , _ := strconv.Atoi(user.Gid)
|
||||||
|
|
||||||
|
job_desc.Group_id= uint32(groupid)
|
||||||
|
job_desc.Name = "flex_mpi_job"
|
||||||
|
job_desc.Partition="long"
|
||||||
|
job_desc.Time_limit = uint32(60)
|
||||||
|
job_desc.Ntasks_per_node = uint16(1)
|
||||||
|
job_desc.Num_tasks = uint32(2)
|
||||||
|
job_desc.Std_out = ("./%j-out.txt")
|
||||||
|
job_desc.Std_err = ("./%j-err.txt")
|
||||||
|
job_desc.Work_dir = dir
|
||||||
|
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
answer := submit_job.Submit_job(&job_desc)
|
||||||
|
if(answer.Error_code != 0) {
|
||||||
|
msg := slurm.GetErrorString(answer.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("Submitted Job %d\n", answer.Job_id)
|
||||||
|
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
|
||||||
|
job_list := job_info.Get_job(answer.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job := job_list.Job_list[0]
|
||||||
|
|
||||||
|
fmt.Printf("job %d is %s\n", answer.Job_id, job.Job_stateS)
|
||||||
|
state := job.Job_stateS
|
||||||
|
if state == "Pending" {
|
||||||
|
fmt.Printf("Move job %d to another partition \n", answer.Job_id)
|
||||||
|
var ops submit_job.Update_job_options
|
||||||
|
|
||||||
|
ops.Qos = "shortjobs"
|
||||||
|
ops.Partition = "short"
|
||||||
|
err2 := submit_job.Update_job(ops, uint32(answer.Job_id))
|
||||||
|
if err2!= uint32(0) {
|
||||||
|
fmt.Printf("error %s \n", slurm.GetErrorString(err2))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for state == "Pending" || state == "Running" {
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
job_list = job_info.Get_job(answer.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job = job_list.Job_list[0]
|
||||||
|
|
||||||
|
state = job.Job_stateS
|
||||||
|
|
||||||
|
fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Total runtime Job %d %s\n",job.Job_id, job_info.Get_job_runtime(job).String() )
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/submitjob"
|
||||||
|
import "slurm"
|
||||||
|
import "os/user"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
import "fmt"
|
||||||
|
import "os/exec"
|
||||||
|
import "path/filepath"
|
||||||
|
import "slurm/jobinfo"
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
|
||||||
|
func fileExists(filename string) bool {
|
||||||
|
info, err := os.Stat(filename)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !info.IsDir()
|
||||||
|
}
|
||||||
|
func build_container(file_name,container_name string){
|
||||||
|
|
||||||
|
cmd := exec.Command("sudo","/usr/local/bin/singularity", "build",container_name, file_name)
|
||||||
|
fmt.Print("Now build new container")
|
||||||
|
fmt.Printf("%s\n", cmd.String())
|
||||||
|
stdoutStderr, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error in creating container %s \n", err)
|
||||||
|
|
||||||
|
fmt.Printf("%s\n", stdoutStderr)
|
||||||
|
// return
|
||||||
|
}
|
||||||
|
fmt.Printf("%s\n", stdoutStderr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
job_desc := submit_job.Job_descriptor{}
|
||||||
|
|
||||||
|
dir, _ := os.Getwd()
|
||||||
|
container := filepath.Join(dir, "mpi_container.img")
|
||||||
|
definition := filepath.Join(dir, "mpi_container.def")
|
||||||
|
if !fileExists(container){
|
||||||
|
build_container(definition,container)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !fileExists(container){
|
||||||
|
return
|
||||||
|
}
|
||||||
|
/* use Cmd to create our script */
|
||||||
|
|
||||||
|
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n"
|
||||||
|
cmd := exec.Command( "/home0/opt/openmpi/bin/mpirun", "-mca btl_tcp_if_include eth1", "/usr/local/bin/singularity", "exec",container, "/opt/mpi_pingpong" )
|
||||||
|
job_desc.Script+= cmd.String()
|
||||||
|
fmt.Printf("cmd %s\n", job_desc.Script)
|
||||||
|
user, _:= user.Current()
|
||||||
|
userid , _ := strconv.Atoi(user.Uid)
|
||||||
|
job_desc.User_id= uint32(userid)
|
||||||
|
groupid , _ := strconv.Atoi(user.Gid)
|
||||||
|
|
||||||
|
job_desc.Group_id= uint32(groupid)
|
||||||
|
job_desc.Name = "mpi_job"
|
||||||
|
job_desc.Partition="long"
|
||||||
|
job_desc.Time_limit = uint32(60)
|
||||||
|
job_desc.Min_nodes =uint32(2)
|
||||||
|
job_desc.Num_tasks = uint32(2)
|
||||||
|
job_desc.Std_out = ("./%j-out.txt")
|
||||||
|
job_desc.Std_err = ("./%j-err.txt")
|
||||||
|
job_desc.Work_dir = dir
|
||||||
|
|
||||||
|
answer := submit_job.Submit_job(&job_desc)
|
||||||
|
if(answer.Error_code != 0) {
|
||||||
|
msg := slurm.GetErrorString(answer.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("Submitted Job %d\n", answer.Job_id)
|
||||||
|
|
||||||
|
|
||||||
|
job_list := job_info.Get_job(answer.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job := job_list.Job_list[0]
|
||||||
|
|
||||||
|
fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS)
|
||||||
|
state := job.Job_stateS
|
||||||
|
for state == "Pending" || state == "Running" {
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
job_list = job_info.Get_job(answer.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job = job_list.Job_list[0]
|
||||||
|
|
||||||
|
state = job.Job_stateS
|
||||||
|
|
||||||
|
fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Total runtime Job %d: %s\n",job.Job_id, job_info.Get_job_runtime(job).String() )
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,162 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "slurm/submitjob"
|
||||||
|
import "slurm"
|
||||||
|
import "os/user"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
import "fmt"
|
||||||
|
import "os/exec"
|
||||||
|
import "path/filepath"
|
||||||
|
import "slurm/jobinfo"
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
|
||||||
|
func fileExists(filename string) bool {
|
||||||
|
info, err := os.Stat(filename)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !info.IsDir()
|
||||||
|
}
|
||||||
|
func build_container(file_name,container_name string){
|
||||||
|
|
||||||
|
cmd := exec.Command("sudo", "/usr/local/bin/singularity", "build",container_name, file_name)
|
||||||
|
fmt.Print("Now build new container")
|
||||||
|
fmt.Printf("%s\n", cmd.String())
|
||||||
|
stdoutStderr, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error in creating container %s \n", err)
|
||||||
|
|
||||||
|
fmt.Printf("%s\n", stdoutStderr)
|
||||||
|
// return
|
||||||
|
}
|
||||||
|
fmt.Printf("%s\n", stdoutStderr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func main(){
|
||||||
|
job_desc := submit_job.Job_descriptor{}
|
||||||
|
|
||||||
|
dir, _ := os.Getwd()
|
||||||
|
container := filepath.Join(dir, "openmp_container.img")
|
||||||
|
definition := filepath.Join(dir, "openmp_container.def")
|
||||||
|
if !fileExists(container){
|
||||||
|
build_container(definition,container)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !fileExists(container){
|
||||||
|
return
|
||||||
|
}
|
||||||
|
/* use Cmd to create our script */
|
||||||
|
|
||||||
|
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n"
|
||||||
|
job_desc.Script+= "export OMP_NUM_THREADS=$SLURM_JOB_CPUS_PER_NODE\n"
|
||||||
|
cmd := exec.Command( "/usr/local/bin/singularity", "exec",container, "/opt/openmp_example" )
|
||||||
|
|
||||||
|
job_desc.Script+= cmd.String()
|
||||||
|
fmt.Printf("cmd %s\n", job_desc.Script)
|
||||||
|
user, _:= user.Current()
|
||||||
|
userid , _ := strconv.Atoi(user.Uid)
|
||||||
|
job_desc.User_id= uint32(userid)
|
||||||
|
groupid , _ := strconv.Atoi(user.Gid)
|
||||||
|
|
||||||
|
job_desc.Group_id= uint32(groupid)
|
||||||
|
job_desc.Name = "test_job"
|
||||||
|
job_desc.Partition="long"
|
||||||
|
job_desc.Time_limit = uint32(60)
|
||||||
|
job_desc.Min_nodes =uint32(1)
|
||||||
|
job_desc.Num_tasks = uint32(1)
|
||||||
|
|
||||||
|
job_desc.Cpus_per_task = uint16(2)
|
||||||
|
job_desc.Std_out = ("./%j-out.txt")
|
||||||
|
job_desc.Std_err = ("./%j-err.txt")
|
||||||
|
job_desc.Work_dir = dir
|
||||||
|
|
||||||
|
answer := submit_job.Submit_job(&job_desc)
|
||||||
|
if(answer.Error_code != 0) {
|
||||||
|
msg := slurm.GetErrorString(answer.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("Submitted Job %d\n", answer.Job_id)
|
||||||
|
|
||||||
|
/*Now, we submit the same jon again, ut with some oversubsciption */
|
||||||
|
job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n"
|
||||||
|
job_desc.Script+= "export OMP_NUM_THREADS=4\n"
|
||||||
|
|
||||||
|
job_desc.Script+= cmd.String()
|
||||||
|
fmt.Printf("cmd %s\n", job_desc.Script)
|
||||||
|
answer2 := submit_job.Submit_job(&job_desc)
|
||||||
|
if(answer2.Error_code != 0) {
|
||||||
|
msg := slurm.GetErrorString(answer.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("Submitted Job %d\n", answer2.Job_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
job_list := job_info.Get_job(answer.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job := job_list.Job_list[0]
|
||||||
|
|
||||||
|
fmt.Printf("job is %s\n",job.Job_stateS)
|
||||||
|
state := job.Job_stateS
|
||||||
|
for state == "Pending" || state == "Running" {
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
job_list = job_info.Get_job(answer.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job = job_list.Job_list[0]
|
||||||
|
|
||||||
|
state = job.Job_stateS
|
||||||
|
|
||||||
|
fmt.Printf("job is %s\n",job.Job_stateS)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Total runtime first job %s\n",job_info.Get_job_runtime(job).String() )
|
||||||
|
/*wait for second job */
|
||||||
|
job_list = job_info.Get_job(answer2.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job = job_list.Job_list[0]
|
||||||
|
|
||||||
|
fmt.Printf("job is %s\n",job.Job_stateS)
|
||||||
|
state = job.Job_stateS
|
||||||
|
for state == "Pending" || state == "Running" {
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
job_list = job_info.Get_job(answer2.Job_id)
|
||||||
|
if job_list.Error_code != 0 {
|
||||||
|
msg := slurm.GetErrorString(job_list.Error_code)
|
||||||
|
fmt.Printf("Error: %s\n" ,msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
job = job_list.Job_list[0]
|
||||||
|
|
||||||
|
state = job.Job_stateS
|
||||||
|
|
||||||
|
fmt.Printf("job is %s\n",job.Job_stateS)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fmt.Printf("Total runtime second job %s\n",job_info.Get_job_runtime(job).String() )
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
|
||||||
|
import "slurm/submitjob"
|
||||||
|
import "slurm"
|
||||||
|
import "os"
|
||||||
|
import "strconv"
|
||||||
|
import "fmt"
|
||||||
|
func main(){
|
||||||
|
if len(os.Args)<4 {
|
||||||
|
fmt.Printf("Synthax specify JobID, qos and partition \n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var ops submit_job.Update_job_options
|
||||||
|
id,err := strconv.Atoi(os.Args[1])
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Invalid job id (no int) %s\n", os.Args[1] )
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ops.Qos = os.Args[2]
|
||||||
|
ops.Partition = os.Args[3]
|
||||||
|
|
||||||
|
err2 := submit_job.Update_job(ops, uint32(id))
|
||||||
|
if err2!= uint32(0) {
|
||||||
|
fmt.Printf("error %s \n", slurm.GetErrorString(err2))
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue