made changes needed to get hibridon benchmark running on physix (ipr's cluster):
- added in `starbench` the option to choose which cmake executable to use - fixed typos in sge environment variables - added job submit mechanism With these changes, hibridon's benchmark succeeded on physix48
This commit is contained in:
		
							parent
							
								
									6715cd1714
								
							
						
					
					
						commit
						936dfa793a
					
				|  | @ -8,16 +8,22 @@ code_version="$4"  # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e | |||
| cmake_options="$5"  # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON' | ||||
| benchmark_command="$6"  # eg 'ctest -L ^arch4_quick$' | ||||
| 
 | ||||
| if [ "${JOBID}" = '' ] | ||||
| cmake_path='/opt/cmake/cmake-3.23.0/bin/cmake' | ||||
| 
 | ||||
| if [ "${JOB_ID}" = '' ] | ||||
| then | ||||
| 	# this script is not executed by sge... set dummy values for test | ||||
| 	TMP_DIR=/tmp | ||||
| 	JOBID=666666 | ||||
| 	TMPDIR=/tmp | ||||
| 	JOB_ID=666666 | ||||
| 	NSLOTS=2 | ||||
| fi | ||||
| 
 | ||||
| echo "Executing on $(hostname)" | ||||
| temp_dir=${TMP_DIR}/$(whoami)/${JOBID} | ||||
| echo "Executing job ${JOB_ID} on $(hostname)" | ||||
| temp_dir=${TMPDIR}/$(whoami)/${JOB_ID} | ||||
| if [ -d "${temp_dir}" ] | ||||
| then | ||||
| 	rm -Rf "${temp_dir}" | ||||
| fi | ||||
| mkdir -p "${temp_dir}" | ||||
| 
 | ||||
| starbench_path="${temp_dir}/starbench.py" | ||||
|  | @ -38,6 +44,7 @@ command="${command} --git-pass-file ${git_pass_file}" | |||
| command="${command} --num-cores ${num_cores}" | ||||
| command="${command} --output-dir ${output_dir}" | ||||
| command="${command} --code-version ${code_version}" | ||||
| command="${command} --cmake-path ${cmake_path}" | ||||
| # echo "cmake_options: @$cmake_options@" | ||||
| for cmake_option in ${cmake_options} | ||||
| do | ||||
|  |  | |||
|  | @ -1,6 +1,23 @@ | |||
| #!/usr/bin/env bash | ||||
| # this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number) | ||||
| 
 | ||||
| function show_usage() | ||||
| { | ||||
| 	echo "launches hibridon benchmark jobs on IPR's physix cluster" | ||||
| 	echo | ||||
| 	echo "syntax :" | ||||
| 	echo "    $0 <hibridon_version>" | ||||
| 	echo | ||||
| 	echo "example:" | ||||
| 	echo "    $0 a3bed1c3ccfbca572003020d3e3d3b1ff3934fad" | ||||
| } | ||||
| 
 | ||||
| if [ "$#" != "1" ] | ||||
| then | ||||
| 	show_usage | ||||
| 	exit 1 | ||||
| fi | ||||
| 
 | ||||
| HIBRIDON_VERSION="$1"  # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' | ||||
| # 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'  # latest from branch master as of 01/06/2022 12:52 | ||||
| # code_version='775048db02dfb317d5eaddb6d6db520be71a2fdf'  # latest from branch graffy-issue51 as of 01/06/ | ||||
|  | @ -21,36 +38,59 @@ function launch_job_for_host_group() | |||
| 	local hosts='' | ||||
| 	local num_cores='' | ||||
| 	case "${host_group_id}" in | ||||
| 		'xeon_gold_6140') | ||||
| 			hosts='\ | ||||
| 				physix88.ipr.univ-rennes1.fr|\ | ||||
| 				physix89.ipr.univ-rennes1.fr' | ||||
| 		'intel_xeon_gold_6140') | ||||
| 			hosts="${hosts}physix88.ipr.univ-rennes1.fr" | ||||
| 			hosts="${hosts}|physix89.ipr.univ-rennes1.fr" | ||||
| 			num_cores='36' | ||||
| 			;; | ||||
| 		'intel_xeon_x5550') | ||||
| 			hosts="${hosts}physix48.ipr.univ-rennes1.fr" | ||||
| 			num_cores='8' | ||||
| 			;; | ||||
| 		*) | ||||
| 			error "unhandled host_group_id : ${host_group_id}" | ||||
| 			exit 1 | ||||
| 			;; | ||||
| 	esac | ||||
| 
 | ||||
| 	quick_test='^arch4_quick$'  # about 2s on a core i5 8th generation | ||||
| 	benchmark_test='^nh3h2_qma_long$'  # about 10min on a core i5 8th generation | ||||
| 	quick_test='arch4_quick'  # about 2s on a core i5 8th generation | ||||
| 	representative_test='nh3h2_qma_long'  # about 10min on a core i5 8th generation | ||||
| 	benchmark_test="${quick_test}" | ||||
| 
 | ||||
| 	git_repos_url="https://github.com/hibridon/hibridon" | ||||
| 	git_user='g-raffy'  # os.environ['HIBRIDON_REPOS_USER'] | ||||
| 	git_pass_file="$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" | ||||
| 	cmake_options='-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON' | ||||
| 	benchmark_command="ctest --output-on-failure -L ${quick_test}" | ||||
| 	benchmark_command="ctest --output-on-failure -L ^${benchmark_test}\$" | ||||
| 
 | ||||
| 	# cat $SCRIPT_DIR/hibench.job | sed "s~<include:starbench.py>~$(cat $SCRIPT_DIR/starbench.py)~" > /tmp/hibench.job | ||||
| 	cat $SCRIPT_DIR/hibench.job | substitute_TAG_with_FILEcontents '<include:starbench.py>' "$SCRIPT_DIR/starbench.py" > /tmp/hibench.job | ||||
| 	chmod a+x /tmp/hibench.job | ||||
| 
 | ||||
| 	command="/tmp/hibench.job \"${git_repos_url}\" \"${git_user}\" \"${git_pass_file}\" \"${HIBRIDON_VERSION}\" \"${cmake_options}\" \"${benchmark_command}\"" | ||||
| 	echo "command = $command" | ||||
| 	eval $command | ||||
| 	local hibench_root_dir="$GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench" | ||||
| 	mkdir -p "${hibench_root_dir}" | ||||
| 
 | ||||
| 	# qsub -pe smp "$num_cores" -l "hostname=${hosts}"  | ||||
| 	local this_bench_dir="${hibench_root_dir}/${hibridon_version}/${benchmark_test}/${host_group_id}" | ||||
| 	mkdir -p "${this_bench_dir}" | ||||
| 
 | ||||
| 	command="/tmp/hibench.job \"${git_repos_url}\" \"${git_user}\" \"${git_pass_file}\" \"${hibridon_version}\" \"${cmake_options}\" \"${benchmark_command}\"" | ||||
| 	echo "command = $command" | ||||
| 		# eval $command | ||||
| 
 | ||||
| 	pushd "${this_bench_dir}" | ||||
| 
 | ||||
| 		qsub_command="qsub" | ||||
| 		qsub_command="${qsub_command} -pe smp ${num_cores}" | ||||
| 		qsub_command="${qsub_command} -l \"hostname=${hosts}\"" | ||||
| 		qsub_command="${qsub_command} -cwd" | ||||
| 		qsub_command="${qsub_command} -m ae" | ||||
| 		qsub_command="${qsub_command} -l mem_available=1G" | ||||
| 		qsub_command="${qsub_command} -N ${benchmark_test}_${host_group_id}" | ||||
| 		qsub_command="${qsub_command} ${command}" | ||||
| 		# qsub -pe smp "$num_cores" -l "hostname=${hosts}"  | ||||
| 		echo "qsub_command = $qsub_command" | ||||
| 		eval $qsub_command | ||||
| 	popd | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -58,7 +98,8 @@ function launch_perf_jobs() | |||
| { | ||||
| 	local hibridon_version="$1" # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' | ||||
| 	 | ||||
| 	launch_job_for_host_group "${hibridon_version}" 'xeon_gold_6140' | ||||
| 	launch_job_for_host_group "${hibridon_version}" 'intel_xeon_x5550' | ||||
| 	#launch_job_for_host_group "${hibridon_version}" 'intel_xeon_gold_6140' | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -186,13 +186,15 @@ class StarBencher(): | |||
|             self._runs[run.id] = run | ||||
| 
 | ||||
|     def run(self): | ||||
|         print("executing the following command in parallel (%d parallel runs) : '%s'" % (self.num_parallel_runs, str(self.run_command))) | ||||
|         for worker_id in range(self.num_parallel_runs): | ||||
|             self._start_run(worker_id) | ||||
|         # wait until all runs have finished | ||||
|         self._finished_event.wait() | ||||
|         with self._runs_lock: | ||||
|             if not all([run.return_code == 0 for run in self._runs.values()]): | ||||
|                 raise Exception('at least one run failed') | ||||
|             workers_success = [run.return_code == 0 for run in self._runs.values()] | ||||
|             if not all(workers_success): | ||||
|                 raise Exception('at least one run failed (workers_success = %s)' % workers_success) | ||||
|         mean_duration, num_runs = self._get_run_mean_duration() | ||||
|         print('mean duration : %.3f s (%d runs)' % (mean_duration, num_runs)) | ||||
|         return mean_duration | ||||
|  | @ -214,7 +216,7 @@ def test_starbencher(): | |||
| # end of starbencher | ||||
| 
 | ||||
| 
 | ||||
| def starbench_cmake_app(git_repos_url: str, code_version: str, tmp_dir: Path, num_cores: int, git_user: str, git_password: str, benchmark_command: List[str], cmake_options: List[str] = None): | ||||
| def starbench_cmake_app(git_repos_url: str, code_version: str, tmp_dir: Path, num_cores: int, git_user: str, git_password: str, benchmark_command: List[str], cmake_options: List[str] = None, cmake_exe_location: Path = None): | ||||
|     """ | ||||
|     tests_to_run : regular expression as understood by ctest's -L option. eg '^arch4_quick$' | ||||
|     """ | ||||
|  | @ -247,8 +249,11 @@ def starbench_cmake_app(git_repos_url: str, code_version: str, tmp_dir: Path, nu | |||
|     # build_dir.mkdir(exist_ok=True) | ||||
| 
 | ||||
|     print('configuring %s into %s ...' % (src_dir, build_dir)) | ||||
|     cmake_prog = 'cmake' | ||||
|     if cmake_exe_location: | ||||
|         cmake_prog = str(cmake_exe_location) | ||||
|     configure = StarBencher( | ||||
|         run_command=['cmake'] + cmake_options + [src_dir], | ||||
|         run_command=[cmake_prog] + cmake_options + [src_dir], | ||||
|         num_cores_per_run=1, | ||||
|         num_parallel_runs=num_cores, | ||||
|         max_num_cores=num_cores, | ||||
|  | @ -289,7 +294,7 @@ if __name__ == '__main__': | |||
| 
 | ||||
|     example_text = '''example: | ||||
| 
 | ||||
|     %(prog)s --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$' | ||||
|     %(prog)s --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-path=/opt/cmake/cmake-3.23.0/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$' | ||||
| 
 | ||||
|     ''' | ||||
| 
 | ||||
|  | @ -302,6 +307,7 @@ if __name__ == '__main__': | |||
|     password_group.add_argument('--git-pass', type=str, help='the password (or personal access token) to use (not recommended for security reasons)') | ||||
|     parser.add_argument('--num-cores', type=int, required=True, help='the number of cores that the benchmark will use') | ||||
|     parser.add_argument('--output-dir', type=Path, required=True, help='where the output files will be placed') | ||||
|     parser.add_argument('--cmake-path', type=Path, help='the path to the cmake executable to use in case a specific cmake is wanted') | ||||
|     parser.add_argument('--cmake-option', type=str, action='append', help='additional option passed to cmake in the configure step (use this flag multiple times if you need more than one cmake option)') | ||||
|     parser.add_argument('--benchmark-command', required=True, type=str, help='the command to benchmark') | ||||
|     args = parser.parse_args() | ||||
|  | @ -316,4 +322,4 @@ if __name__ == '__main__': | |||
|         with open(args.git_pass_file, 'r') as f: | ||||
|             git_password = f.readline().replace('\n', '')  # os.environ['HIBRIDON_REPOS_PAT'] | ||||
| 
 | ||||
|     starbench_cmake_app(git_repos_url=git_repos_url, code_version=args.code_version, tmp_dir=args.output_dir, num_cores=args.num_cores, git_user=git_user, git_password=git_password, cmake_options=args.cmake_option, benchmark_command=args.benchmark_command.split(' ')) | ||||
|     starbench_cmake_app(git_repos_url=git_repos_url, code_version=args.code_version, tmp_dir=args.output_dir, num_cores=args.num_cores, git_user=git_user, git_password=git_password, cmake_options=args.cmake_option, benchmark_command=args.benchmark_command.split(' '), cmake_exe_location=args.cmake_path) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue