v0.0.15
- clusterbench-submit now appends the cluster job number to the launcher. This will improve traceability. work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
		
							parent
							
								
									263474eb5a
								
							
						
					
					
						commit
						b85f381241
					
				
							
								
								
									
										26
									
								
								README.md
								
								
								
								
							
							
						
						
									
										26
									
								
								README.md
								
								
								
								
							|  | @ -70,19 +70,19 @@ The following example command submits jobs (one job per cpu architecture) that e | |||
| INFO:root:available host groups: dict_keys(['intel_core_i5_8350u', 'intel_xeon_x5550', 'intel_xeon_x5650', 'intel_xeon_e5-2660', 'intel_xeon_e5-2660v2', 'intel_xeon_e5-2660v4', 'intel_xeon_gold_6140', 'intel_xeon_gold_6154', 'intel_xeon_gold_5220', 'intel_xeon_gold_6226r', 'intel_xeon_gold_6248r', 'intel_xeon_gold_6348', 'amd_epyc_7282', 'amd_epyc_7452']) | ||||
| INFO:root:requested host groups: ['intel_xeon_x5650'] | ||||
| DEBUG:root:iprbench_venv_hardcoded_path = /tmp/user/59825/iprbench.venv | ||||
| INFO:root:creating /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/iprbench.venv.tgz (the virtual environment that will be used in this bench by all its jobs at some point) | ||||
| INFO:root:creating /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00/iprbench.venv.tgz (the virtual environment that will be used in this bench by all its jobs at some point) | ||||
| Collecting virtualenv-clone | ||||
|   Using cached virtualenv_clone-0.5.7-py3-none-any.whl (6.6 kB) | ||||
| Installing collected packages: virtualenv-clone | ||||
| Successfully installed virtualenv-clone-0.5.7 | ||||
| DEBUG:root:type of resultsdb_params = <class 'dict'> | ||||
| DEBUG:root:resultsdb_params = {'type': 'tsv-files', 'tsv_results_dir': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00'} | ||||
| DEBUG:root:resultsdb_params = {"type": "tsv-files", "tsv_results_dir": "/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00"} | ||||
| DEBUG:root:tags_dict = {'<benchmark_id>': 'hibridon', '<starbench_job_path>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/intel_xeon_x5650/starbench.job', '<iprbench_venv_hardcoded_path>': '/tmp/user/59825/iprbench.venv', '<iprbench_venv_archive_path>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/iprbench.venv.tgz', '<benchmark_config>': '{\\"fortran_compiler\\": \\"ifort:<default>\\", \\"blas_library\\": \\"intelmkl:<default>\\", \\"test_id\\": \\"arch4_quick\\", \\"hibridon_version\\": \\"a3bed1c3ccfbca572003020d3e3d3b1ff3934fad\\", \\"cmake_path\\": \\"cmake\\", \\"num_cores\\": 12, \\"launcher\\": \\"graffy.manual\\"}', '<results_dir>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00', '<resultsdb_params>': '{\\"type\\": \\"tsv-files\\", \\"tsv_results_dir\\": \\"/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00\\"}', '<num_cores>': '12', '<target_system_type_id>': 'fr.univ-rennes.ipr.cluster-node'} | ||||
| DEBUG:root:resultsdb_params = {'type': 'tsv-files', 'tsv_results_dir': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00'} | ||||
| DEBUG:root:resultsdb_params = {"type": "tsv-files", "tsv_results_dir": "/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00"} | ||||
| DEBUG:root:tags_dict = {'<benchmark_id>': 'hibridon', '<starbench_job_path>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00/intel_xeon_x5650/starbench.job', '<iprbench_venv_hardcoded_path>': '/tmp/user/59825/iprbench.venv', '<iprbench_venv_archive_path>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00/iprbench.venv.tgz', '<benchmark_config>': '{\\"fortran_compiler\\": \\"ifort:<default>\\", \\"blas_library\\": \\"intelmkl:<default>\\", \\"test_id\\": \\"arch4_quick\\", \\"hibridon_version\\": \\"a3bed1c3ccfbca572003020d3e3d3b1ff3934fad\\", \\"cmake_path\\": \\"cmake\\", \\"num_cores\\": 12, \\"launcher\\": \\"graffy.manual.alambix.job\'${JOB_ID}\'\\"}', '<results_dir>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00', '<resultsdb_params>': '{\\"type\\": \\"tsv-files\\", \\"tsv_results_dir\\": \\"/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00\\"}', '<num_cores>': '12', '<target_system_type_id>': 'fr.univ-rennes.ipr.cluster-node'} | ||||
| DEBUG:root:ram_per_core = 1.073741824G | ||||
| DEBUG:root:qsub_args = ['-pe', 'smp', '12', '-l', '"hostname=alambix50.ipr.univ-rennes.fr"', '-S', '/bin/bash', '-cwd', '-m', 'ae', '-l', 'mem_available=1.073741824G', '-j', 'y', '-N', 'hibridon_intel_xeon_x5650'] | ||||
| DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1.073741824G -j y -N hibridon_intel_xeon_x5650 /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/intel_xeon_x5650/starbench.job , working_dir=/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/intel_xeon_x5650 | ||||
| Your job 18879 ("hibridon_intel_xeon_x5650") has been submitted | ||||
| DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1.073741824G -j y -N hibridon_intel_xeon_x5650 /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00/intel_xeon_x5650/starbench.job , working_dir=/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00/intel_xeon_x5650 | ||||
| Your job 18886 ("hibridon_intel_xeon_x5650") has been submitted | ||||
| ``` | ||||
| 
 | ||||
| The following command shows that the job is running | ||||
|  | @ -90,7 +90,7 @@ The following command shows that the job is running | |||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ qstat | ||||
| job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID  | ||||
| ----------------------------------------------------------------------------------------------------------------- | ||||
|   18879 0.65000 hibridon_i graffy       r     11/26/2024 18:15:32 short.q@alambix50.ipr.univ-ren    12         | ||||
|   18886 0.65000 hibridon_i graffy       r     11/26/2024 18:15:32 short.q@alambix50.ipr.univ-ren    12         | ||||
| ``` | ||||
| 
 | ||||
| the configuration of the benchmark (`--config`) is defined to run the test `arch4_quick` using the latest versions of ifort and mkl: | ||||
|  | @ -113,18 +113,18 @@ the results database backend used in the benchmark (`--resultsdb-params`) is: | |||
| ```json | ||||
| { | ||||
|   "type": "tsv-files", | ||||
|   "tsv_results_dir": "/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00" | ||||
|   "tsv_results_dir": "/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00" | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| This means that we want to register the results of the benchmark in the tsv (tab separated values) file `/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/hibridon.tsv`. Please note that this result database backend is not really appropriate for `clusterbench-submit`, as it suffers from racing conditions (`sqlserver-viassh-database` would be a better alternative, but it requires a more complicate setup). | ||||
| This means that we want to register the results of the benchmark in the tsv (tab separated values) file `/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00/hibridon.tsv`. Please note that this result database backend is not really appropriate for `clusterbench-submit`, as it suffers from racing conditions (`sqlserver-viassh-database` would be a better alternative, but it requires a more complicate setup). | ||||
| 
 | ||||
| When the jobs successfully complete, they put their results of the benchmark in `$results_dir` (eg `/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00`) | ||||
| When the jobs successfully complete, they put their results of the benchmark in `$results_dir` (eg `/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11:39:42+01:00`) | ||||
| 
 | ||||
| ```sh | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ cat $results_dir/hibridon.tsv  | ||||
| measurement_time	ipr_bench_version	host_id	host_fqdn	user	num_cpus	cpu_model	launcher	num_cores	hibridon_version	fortran_compilerblas_library	test_id	cmake_path	duration_avg	duration_med	duration_stddev	duration_min	duration_max	num_threads_per_run | ||||
| 2024-11-26 18:18:18.391137	0.0.13	<unknown>	alambix50.ipr.univ-rennes.fr	graffy	2	intel_xeon_x5650	graffy.manual	12	a3bed1c3ccfbca572003020d3e3d3b1ff3934fad	ifort:2021.13.1	intelmkl:2024.2.1	arch4_quick	cmake	3.7509884166666665	3.68795	0.1596150383672931	3.533953	4.036977	1 | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ cat /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-27T11\:39\:42+01\:00/hibridon.tsv  | ||||
| measurement_time	ipr_bench_version	host_id	host_fqdn	user	num_cpus	cpu_model	launcher	num_cores	hibridon_version	fortran_compiler	blas_library	test_id	cmake_path	duration_avg	duration_med	duration_stddev	duration_min	duration_max	num_threads_per_run | ||||
| 2024-11-27 11:42:49.511113	0.0.14	<unknown>	alambix50.ipr.univ-rennes.fr	graffy	2	intel_xeon_x5650	graffy.manual.alambix.job18886	12	a3bed1c3ccfbca572003020d3e3d3b1ff3934fad	ifort:2021.13.1	intelmkl:2024.2.1	arch4_quick	cmake	3.8646755	3.8377410000000003	0.2467767843388766	3.569571	4.220125	1 | ||||
| ``` | ||||
| 
 | ||||
| ## graph the results of benchmarks | ||||
|  |  | |||
|  | @ -305,6 +305,9 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark | |||
|     logging.debug("resultsdb_params = %s", resultsdb_params) | ||||
|     logging.debug("resultsdb_params = %s", json.dumps(resultsdb_params)) | ||||
| 
 | ||||
|     # append the job id to the 'launcher' parameter | ||||
|     benchmark_config['launcher'] = benchmark_config['launcher'] + '.' + cluster.cluster_id + ".job'${JOB_ID}'"  # escape bash variables such as JOB_ID with surrounding quotes so that they are interpreted by the job script | ||||
| 
 | ||||
|     # create the job file (which embeds starbench.py) | ||||
|     tags_dict = { | ||||
|         # '<include:starbench.py>': scripts_dir / 'starbench.py', | ||||
|  | @ -392,6 +395,8 @@ def main(): | |||
|     # TODO: put the declaration of common params in a common function so that there is only one set of common parameters | ||||
|     common_params: List[BenchParam] = [] | ||||
|     common_params.append(BenchParam('launcher', BenchParam.Type.PARAM_TYPE_STRING, description='what triggered the benchmark (eg "alambix.job.12345", or "manual")')) | ||||
|     # if 'launcher' in benchmark_config.keys(): | ||||
|     #     raise ValueError(f'the value of "launcher" parameter ({benchmark_config["launcher"]}) should not be set by the user as it will be overwritten by clusterbench-submit') | ||||
| 
 | ||||
|     benchmark = BenchmarkFactory().create_benchmark(benchmark_id, common_params) | ||||
| 
 | ||||
|  |  | |||
|  | @ -1 +1 @@ | |||
| __version__ = '0.0.14' | ||||
| __version__ = '0.0.15' | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue