now the hibridon benchmark runs one job for each machine type on physix

- also tuned memry requirements so that the `representative_test` succeeds
This commit is contained in:
Guillaume Raffy 2022-06-07 19:01:30 +02:00
parent 936dfa793a
commit f2b8d6cdb4
1 changed files with 121 additions and 6 deletions

View File

@ -38,14 +38,104 @@ function launch_job_for_host_group()
local hosts='' local hosts=''
local num_cores='' local num_cores=''
case "${host_group_id}" in case "${host_group_id}" in
'intel_xeon_x5550')
hosts="${hosts}physix48.ipr.univ-rennes1.fr"
num_cores='8'
;;
'intel_xeon_x5650')
hosts="${hosts}physix49.ipr.univ-rennes1.fr"
hosts="${hosts}|physix50.ipr.univ-rennes1.fr"
hosts="${hosts}|physix51.ipr.univ-rennes1.fr"
hosts="${hosts}|physix52.ipr.univ-rennes1.fr"
hosts="${hosts}|physix53.ipr.univ-rennes1.fr"
hosts="${hosts}|physix54.ipr.univ-rennes1.fr"
hosts="${hosts}|physix55.ipr.univ-rennes1.fr"
hosts="${hosts}|physix56.ipr.univ-rennes1.fr"
hosts="${hosts}|physix57.ipr.univ-rennes1.fr"
hosts="${hosts}|physix58.ipr.univ-rennes1.fr"
hosts="${hosts}|physix59.ipr.univ-rennes1.fr"
num_cores='12'
;;
'intel_xeon_e5-2660')
hosts="${hosts}physix60.ipr.univ-rennes1.fr"
hosts="${hosts}|physix61.ipr.univ-rennes1.fr"
hosts="${hosts}|physix62.ipr.univ-rennes1.fr"
hosts="${hosts}|physix63.ipr.univ-rennes1.fr"
hosts="${hosts}|physix64.ipr.univ-rennes1.fr"
hosts="${hosts}|physix65.ipr.univ-rennes1.fr"
hosts="${hosts}|physix66.ipr.univ-rennes1.fr"
hosts="${hosts}|physix67.ipr.univ-rennes1.fr"
hosts="${hosts}|physix68.ipr.univ-rennes1.fr"
hosts="${hosts}|physix69.ipr.univ-rennes1.fr"
hosts="${hosts}|physix70.ipr.univ-rennes1.fr"
hosts="${hosts}|physix71.ipr.univ-rennes1.fr"
num_cores='16'
;;
'intel_xeon_e5-2660v2')
hosts="${hosts}physix72.ipr.univ-rennes1.fr"
hosts="${hosts}|physix73.ipr.univ-rennes1.fr"
hosts="${hosts}|physix74.ipr.univ-rennes1.fr"
hosts="${hosts}|physix75.ipr.univ-rennes1.fr"
hosts="${hosts}|physix76.ipr.univ-rennes1.fr"
hosts="${hosts}|physix77.ipr.univ-rennes1.fr"
hosts="${hosts}|physix78.ipr.univ-rennes1.fr"
hosts="${hosts}|physix79.ipr.univ-rennes1.fr"
hosts="${hosts}|physix80.ipr.univ-rennes1.fr"
hosts="${hosts}|physix81.ipr.univ-rennes1.fr"
hosts="${hosts}|physix82.ipr.univ-rennes1.fr"
hosts="${hosts}|physix84.ipr.univ-rennes1.fr"
num_cores='20'
;;
'intel_xeon_e5-2660v4')
hosts="${hosts}physix84.ipr.univ-rennes1.fr"
hosts="${hosts}|physix85.ipr.univ-rennes1.fr"
hosts="${hosts}|physix86.ipr.univ-rennes1.fr"
hosts="${hosts}|physix87.ipr.univ-rennes1.fr"
num_cores='28'
;;
'intel_xeon_gold_6140') 'intel_xeon_gold_6140')
hosts="${hosts}physix88.ipr.univ-rennes1.fr" hosts="${hosts}physix88.ipr.univ-rennes1.fr"
hosts="${hosts}|physix89.ipr.univ-rennes1.fr" hosts="${hosts}|physix89.ipr.univ-rennes1.fr"
num_cores='36' num_cores='36'
;; ;;
'intel_xeon_x5550') 'intel_xeon_gold_6154')
hosts="${hosts}physix48.ipr.univ-rennes1.fr" hosts="${hosts}physix90.ipr.univ-rennes1.fr"
num_cores='8' num_cores='72'
;;
'intel_xeon_gold_5222')
hosts="${hosts}physix92.ipr.univ-rennes1.fr"
num_cores='4'
;;
'intel_xeon_gold_6226r')
hosts="${hosts}physix93.ipr.univ-rennes1.fr"
hosts="${hosts}|physix94.ipr.univ-rennes1.fr"
num_cores='32'
;;
'intel_xeon_gold_6240r')
hosts="${hosts}physix99.ipr.univ-rennes1.fr"
num_cores='48'
;;
'intel_xeon_gold_6248r')
hosts="${hosts}physix95.ipr.univ-rennes1.fr"
hosts="${hosts}|physix96.ipr.univ-rennes1.fr"
hosts="${hosts}|physix97.ipr.univ-rennes1.fr"
hosts="${hosts}|physix98.ipr.univ-rennes1.fr"
hosts="${hosts}|physix99.ipr.univ-rennes1.fr"
hosts="${hosts}|physix100.ipr.univ-rennes1.fr"
hosts="${hosts}|physix101.ipr.univ-rennes1.fr"
hosts="${hosts}|physix102.ipr.univ-rennes1.fr"
num_cores='48'
;;
'amd_epyc_7282')
hosts="${hosts}physix12.ipr.univ-rennes1.fr"
hosts="${hosts}|physix13.ipr.univ-rennes1.fr"
hosts="${hosts}|physix14.ipr.univ-rennes1.fr"
hosts="${hosts}|physix15.ipr.univ-rennes1.fr"
num_cores='32'
;; ;;
*) *)
error "unhandled host_group_id : ${host_group_id}" error "unhandled host_group_id : ${host_group_id}"
@ -53,9 +143,22 @@ function launch_job_for_host_group()
;; ;;
esac esac
quick_test='arch4_quick' # about 2s on a core i5 8th generation quick_test='arch4_quick' # about 2s on a core i5 8th generation
representative_test='nh3h2_qma_long' # about 10min on a core i5 8th generation representative_test='nh3h2_qma_long' # about 10min on a core i5 8th generation
benchmark_test="${quick_test}" benchmark_test="${representative_test}"
case "${benchmark_test}" in
'arch4_quick')
ram_per_core='1G'
;;
'nh3h2_qma_long')
ram_per_core='2.8G' # this was enough on physix48, but maybe we can reduce more
;;
*)
error "unhandled benchmark_test : ${benchmark_test}"
exit 1
;;
esac
git_repos_url="https://github.com/hibridon/hibridon" git_repos_url="https://github.com/hibridon/hibridon"
git_user='g-raffy' # os.environ['HIBRIDON_REPOS_USER'] git_user='g-raffy' # os.environ['HIBRIDON_REPOS_USER']
@ -70,7 +173,7 @@ function launch_job_for_host_group()
local hibench_root_dir="$GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench" local hibench_root_dir="$GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench"
mkdir -p "${hibench_root_dir}" mkdir -p "${hibench_root_dir}"
local this_bench_dir="${hibench_root_dir}/${hibridon_version}/${benchmark_test}/${host_group_id}" local this_bench_dir="${hibench_root_dir}/${hibridon_version}/${benchmark_test}/${host_group_id}/$(date --iso-8601=seconds)"
mkdir -p "${this_bench_dir}" mkdir -p "${this_bench_dir}"
command="/tmp/hibench.job \"${git_repos_url}\" \"${git_user}\" \"${git_pass_file}\" \"${hibridon_version}\" \"${cmake_options}\" \"${benchmark_command}\"" command="/tmp/hibench.job \"${git_repos_url}\" \"${git_user}\" \"${git_pass_file}\" \"${hibridon_version}\" \"${cmake_options}\" \"${benchmark_command}\""
@ -84,7 +187,7 @@ function launch_job_for_host_group()
qsub_command="${qsub_command} -l \"hostname=${hosts}\"" qsub_command="${qsub_command} -l \"hostname=${hosts}\""
qsub_command="${qsub_command} -cwd" qsub_command="${qsub_command} -cwd"
qsub_command="${qsub_command} -m ae" qsub_command="${qsub_command} -m ae"
qsub_command="${qsub_command} -l mem_available=1G" qsub_command="${qsub_command} -l mem_available=${ram_per_core}"
qsub_command="${qsub_command} -N ${benchmark_test}_${host_group_id}" qsub_command="${qsub_command} -N ${benchmark_test}_${host_group_id}"
qsub_command="${qsub_command} ${command}" qsub_command="${qsub_command} ${command}"
# qsub -pe smp "$num_cores" -l "hostname=${hosts}" # qsub -pe smp "$num_cores" -l "hostname=${hosts}"
@ -99,6 +202,18 @@ function launch_perf_jobs()
local hibridon_version="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' local hibridon_version="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_x5550' launch_job_for_host_group "${hibridon_version}" 'intel_xeon_x5550'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_x5650'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_e5-2660'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_e5-2660v2'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_e5-2660v4'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6140'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6154'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_5222'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6226r'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6240r'
launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6248r'
launch_job_for_host_group "${hibridon_version}" 'amd_epyc_7282'
#launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6140' #launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6140'
} }