made changes needed to get hibridon benchmark running on physix (ipr's cluster):

- added in `starbench` the option to choose which cmake executable to use
- fixed typos in sge environment variables
- added job submit mechanism

With these changes, hibridon's benchmark succeeded on physix48
This commit is contained in:
Guillaume Raffy 2022-06-07 14:52:56 +02:00
parent 6715cd1714
commit 936dfa793a
3 changed files with 77 additions and 23 deletions

View File

@ -8,16 +8,22 @@ code_version="$4" # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e
cmake_options="$5" # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON' cmake_options="$5" # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON'
benchmark_command="$6" # eg 'ctest -L ^arch4_quick$' benchmark_command="$6" # eg 'ctest -L ^arch4_quick$'
if [ "${JOBID}" = '' ] cmake_path='/opt/cmake/cmake-3.23.0/bin/cmake'
if [ "${JOB_ID}" = '' ]
then then
# this script is not executed by sge... set dummy values for test # this script is not executed by sge... set dummy values for test
TMP_DIR=/tmp TMPDIR=/tmp
JOBID=666666 JOB_ID=666666
NSLOTS=2 NSLOTS=2
fi fi
echo "Executing on $(hostname)" echo "Executing job ${JOB_ID} on $(hostname)"
temp_dir=${TMP_DIR}/$(whoami)/${JOBID} temp_dir=${TMPDIR}/$(whoami)/${JOB_ID}
if [ -d "${temp_dir}" ]
then
rm -Rf "${temp_dir}"
fi
mkdir -p "${temp_dir}" mkdir -p "${temp_dir}"
starbench_path="${temp_dir}/starbench.py" starbench_path="${temp_dir}/starbench.py"
@ -38,6 +44,7 @@ command="${command} --git-pass-file ${git_pass_file}"
command="${command} --num-cores ${num_cores}" command="${command} --num-cores ${num_cores}"
command="${command} --output-dir ${output_dir}" command="${command} --output-dir ${output_dir}"
command="${command} --code-version ${code_version}" command="${command} --code-version ${code_version}"
command="${command} --cmake-path ${cmake_path}"
# echo "cmake_options: @$cmake_options@" # echo "cmake_options: @$cmake_options@"
for cmake_option in ${cmake_options} for cmake_option in ${cmake_options}
do do

View File

@ -1,6 +1,23 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number) # this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number)
function show_usage()
{
echo "launches hibridon benchmark jobs on IPR's physix cluster"
echo
echo "syntax :"
echo " $0 <hibridon_version>"
echo
echo "example:"
echo " $0 a3bed1c3ccfbca572003020d3e3d3b1ff3934fad"
}
if [ "$#" != "1" ]
then
show_usage
exit 1
fi
HIBRIDON_VERSION="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' HIBRIDON_VERSION="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
# 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' # latest from branch master as of 01/06/2022 12:52 # 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' # latest from branch master as of 01/06/2022 12:52
# code_version='775048db02dfb317d5eaddb6d6db520be71a2fdf' # latest from branch graffy-issue51 as of 01/06/ # code_version='775048db02dfb317d5eaddb6d6db520be71a2fdf' # latest from branch graffy-issue51 as of 01/06/
@ -21,36 +38,59 @@ function launch_job_for_host_group()
local hosts='' local hosts=''
local num_cores='' local num_cores=''
case "${host_group_id}" in case "${host_group_id}" in
'xeon_gold_6140') 'intel_xeon_gold_6140')
hosts='\ hosts="${hosts}physix88.ipr.univ-rennes1.fr"
physix88.ipr.univ-rennes1.fr|\ hosts="${hosts}|physix89.ipr.univ-rennes1.fr"
physix89.ipr.univ-rennes1.fr'
num_cores='36' num_cores='36'
;; ;;
'intel_xeon_x5550')
hosts="${hosts}physix48.ipr.univ-rennes1.fr"
num_cores='8'
;;
*) *)
error "unhandled host_group_id : ${host_group_id}" error "unhandled host_group_id : ${host_group_id}"
exit 1 exit 1
;; ;;
esac esac
quick_test='^arch4_quick$' # about 2s on a core i5 8th generation quick_test='arch4_quick' # about 2s on a core i5 8th generation
benchmark_test='^nh3h2_qma_long$' # about 10min on a core i5 8th generation representative_test='nh3h2_qma_long' # about 10min on a core i5 8th generation
benchmark_test="${quick_test}"
git_repos_url="https://github.com/hibridon/hibridon" git_repos_url="https://github.com/hibridon/hibridon"
git_user='g-raffy' # os.environ['HIBRIDON_REPOS_USER'] git_user='g-raffy' # os.environ['HIBRIDON_REPOS_USER']
git_pass_file="$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" git_pass_file="$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat"
cmake_options='-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON' cmake_options='-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON'
benchmark_command="ctest --output-on-failure -L ${quick_test}" benchmark_command="ctest --output-on-failure -L ^${benchmark_test}\$"
# cat $SCRIPT_DIR/hibench.job | sed "s~<include:starbench.py>~$(cat $SCRIPT_DIR/starbench.py)~" > /tmp/hibench.job # cat $SCRIPT_DIR/hibench.job | sed "s~<include:starbench.py>~$(cat $SCRIPT_DIR/starbench.py)~" > /tmp/hibench.job
cat $SCRIPT_DIR/hibench.job | substitute_TAG_with_FILEcontents '<include:starbench.py>' "$SCRIPT_DIR/starbench.py" > /tmp/hibench.job cat $SCRIPT_DIR/hibench.job | substitute_TAG_with_FILEcontents '<include:starbench.py>' "$SCRIPT_DIR/starbench.py" > /tmp/hibench.job
chmod a+x /tmp/hibench.job chmod a+x /tmp/hibench.job
command="/tmp/hibench.job \"${git_repos_url}\" \"${git_user}\" \"${git_pass_file}\" \"${HIBRIDON_VERSION}\" \"${cmake_options}\" \"${benchmark_command}\"" local hibench_root_dir="$GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench"
echo "command = $command" mkdir -p "${hibench_root_dir}"
eval $command
# qsub -pe smp "$num_cores" -l "hostname=${hosts}" local this_bench_dir="${hibench_root_dir}/${hibridon_version}/${benchmark_test}/${host_group_id}"
mkdir -p "${this_bench_dir}"
command="/tmp/hibench.job \"${git_repos_url}\" \"${git_user}\" \"${git_pass_file}\" \"${hibridon_version}\" \"${cmake_options}\" \"${benchmark_command}\""
echo "command = $command"
# eval $command
pushd "${this_bench_dir}"
qsub_command="qsub"
qsub_command="${qsub_command} -pe smp ${num_cores}"
qsub_command="${qsub_command} -l \"hostname=${hosts}\""
qsub_command="${qsub_command} -cwd"
qsub_command="${qsub_command} -m ae"
qsub_command="${qsub_command} -l mem_available=1G"
qsub_command="${qsub_command} -N ${benchmark_test}_${host_group_id}"
qsub_command="${qsub_command} ${command}"
# qsub -pe smp "$num_cores" -l "hostname=${hosts}"
echo "qsub_command = $qsub_command"
eval $qsub_command
popd
} }
@ -58,7 +98,8 @@ function launch_perf_jobs()
{ {
local hibridon_version="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' local hibridon_version="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
launch_job_for_host_group "${hibridon_version}" 'xeon_gold_6140' launch_job_for_host_group "${hibridon_version}" 'intel_xeon_x5550'
#launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6140'
} }

View File

@ -186,13 +186,15 @@ class StarBencher():
self._runs[run.id] = run self._runs[run.id] = run
def run(self): def run(self):
print("executing the following command in parallel (%d parallel runs) : '%s'" % (self.num_parallel_runs, str(self.run_command)))
for worker_id in range(self.num_parallel_runs): for worker_id in range(self.num_parallel_runs):
self._start_run(worker_id) self._start_run(worker_id)
# wait until all runs have finished # wait until all runs have finished
self._finished_event.wait() self._finished_event.wait()
with self._runs_lock: with self._runs_lock:
if not all([run.return_code == 0 for run in self._runs.values()]): workers_success = [run.return_code == 0 for run in self._runs.values()]
raise Exception('at least one run failed') if not all(workers_success):
raise Exception('at least one run failed (workers_success = %s)' % workers_success)
mean_duration, num_runs = self._get_run_mean_duration() mean_duration, num_runs = self._get_run_mean_duration()
print('mean duration : %.3f s (%d runs)' % (mean_duration, num_runs)) print('mean duration : %.3f s (%d runs)' % (mean_duration, num_runs))
return mean_duration return mean_duration
@ -214,7 +216,7 @@ def test_starbencher():
# end of starbencher # end of starbencher
def starbench_cmake_app(git_repos_url: str, code_version: str, tmp_dir: Path, num_cores: int, git_user: str, git_password: str, benchmark_command: List[str], cmake_options: List[str] = None): def starbench_cmake_app(git_repos_url: str, code_version: str, tmp_dir: Path, num_cores: int, git_user: str, git_password: str, benchmark_command: List[str], cmake_options: List[str] = None, cmake_exe_location: Path = None):
""" """
tests_to_run : regular expression as understood by ctest's -L option. eg '^arch4_quick$' tests_to_run : regular expression as understood by ctest's -L option. eg '^arch4_quick$'
""" """
@ -247,8 +249,11 @@ def starbench_cmake_app(git_repos_url: str, code_version: str, tmp_dir: Path, nu
# build_dir.mkdir(exist_ok=True) # build_dir.mkdir(exist_ok=True)
print('configuring %s into %s ...' % (src_dir, build_dir)) print('configuring %s into %s ...' % (src_dir, build_dir))
cmake_prog = 'cmake'
if cmake_exe_location:
cmake_prog = str(cmake_exe_location)
configure = StarBencher( configure = StarBencher(
run_command=['cmake'] + cmake_options + [src_dir], run_command=[cmake_prog] + cmake_options + [src_dir],
num_cores_per_run=1, num_cores_per_run=1,
num_parallel_runs=num_cores, num_parallel_runs=num_cores,
max_num_cores=num_cores, max_num_cores=num_cores,
@ -289,7 +294,7 @@ if __name__ == '__main__':
example_text = '''example: example_text = '''example:
%(prog)s --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$' %(prog)s --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-path=/opt/cmake/cmake-3.23.0/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$'
''' '''
@ -302,6 +307,7 @@ if __name__ == '__main__':
password_group.add_argument('--git-pass', type=str, help='the password (or personal access token) to use (not recommended for security reasons)') password_group.add_argument('--git-pass', type=str, help='the password (or personal access token) to use (not recommended for security reasons)')
parser.add_argument('--num-cores', type=int, required=True, help='the number of cores that the benchmark will use') parser.add_argument('--num-cores', type=int, required=True, help='the number of cores that the benchmark will use')
parser.add_argument('--output-dir', type=Path, required=True, help='where the output files will be placed') parser.add_argument('--output-dir', type=Path, required=True, help='where the output files will be placed')
parser.add_argument('--cmake-path', type=Path, help='the path to the cmake executable to use in case a specific cmake is wanted')
parser.add_argument('--cmake-option', type=str, action='append', help='additional option passed to cmake in the configure step (use this flag multiple times if you need more than one cmake option)') parser.add_argument('--cmake-option', type=str, action='append', help='additional option passed to cmake in the configure step (use this flag multiple times if you need more than one cmake option)')
parser.add_argument('--benchmark-command', required=True, type=str, help='the command to benchmark') parser.add_argument('--benchmark-command', required=True, type=str, help='the command to benchmark')
args = parser.parse_args() args = parser.parse_args()
@ -316,4 +322,4 @@ if __name__ == '__main__':
with open(args.git_pass_file, 'r') as f: with open(args.git_pass_file, 'r') as f:
git_password = f.readline().replace('\n', '') # os.environ['HIBRIDON_REPOS_PAT'] git_password = f.readline().replace('\n', '') # os.environ['HIBRIDON_REPOS_PAT']
starbench_cmake_app(git_repos_url=git_repos_url, code_version=args.code_version, tmp_dir=args.output_dir, num_cores=args.num_cores, git_user=git_user, git_password=git_password, cmake_options=args.cmake_option, benchmark_command=args.benchmark_command.split(' ')) starbench_cmake_app(git_repos_url=git_repos_url, code_version=args.code_version, tmp_dir=args.output_dir, num_cores=args.num_cores, git_user=git_user, git_password=git_password, cmake_options=args.cmake_option, benchmark_command=args.benchmark_command.split(' '), cmake_exe_location=args.cmake_path)