scripts/cluster/disk-watchdog.sh

#!/bin/bash
# The role of this script is to measure the size of the directories in $SHARED_DISK_ROOT and send an e-mail to $DEST_EMAIL if its fullness goes beyond $FULLNESS_THRESHOLD
# see https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 for details

# test run:
# graffy@work:~/bug3193$ ./disk-watchdog.sh /mnt/work/graffy/workspaces/meniscus $HOME/var/run/ipr/cluster/disk-watchdog /opt/ipr/cluster/work.global 90 guillaume.raffy@univ-rennes.fr "manual trigger" 20

# production run:
# graffy@work:~/bug3193$ sudo ./disk-watchdog.sh /mnt/work /var/run/ipr/cluster/disk-watchdog /opt/ipr/cluster/work.global 90 ipr-cluster@listes.univ-rennes1.fr "cron" 20

SHARED_DISK_ROOT="$1"  # eg '/mnt/work'
REPORTS_ROOT_PATH="$2" # eg "$HOME/var/run/ipr/cluster/disk-watchdog"
DISK_PUBLIC_PATH="$3"  # 'eg /opt/ipr/cluster/work.global'
FULLNESS_THRESHOLD="$4"  # disk fullness percentage above which an e-mail is sent (eg. '90' for 90%)
DEST_EMAIL="$5"  # eg ipr-cluster@listes.univ-rennes1.fr
TRIGGER_REASON="$6"  #  eg "daily cron"
TOP_SIZE="$7"  #  # number of biggest directories reported (eg 20)"

RETURNCODE_SUCCESS=0
RETURNCODE_ERROR=1


function log()
{
	message="$1"
	logger -t 'disk-watchdog' "$message"
}

function error()
{
	message="$1"
	echo 1>&2 "$(date) : ERROR : $message"
}

function sheet_get_column()
{
	local sheet_file_path="$1" # eg /var/run/ipr/cluster/disk-watchdog/meniscus-2022-05-07-16-43-38/total.txt
	local column_name="$2" # eg Use%
	cat "$sheet_file_path" | awk -v col="${column_name}" '\
		NR==1 \
		{\
			for (i=1; i<=NF; i++)\
				if ($i == col)\
				{\
					c=i; break\
				}\
		}\
		NR != 1\
		{\
			print $c\
		}'
}

function create_html_contents()
{
	local language_id="$1"  # "french" or "english"
	local disk_usage_table_as_html="$2"
	local disk_public_path="$3"
	local disk_size_in_gb="$4"
	local fullness_threshold="$5"
	local admin_list_as_html="$6"

	local html_content=''

	case ${language_id} in
		'english')
			html_content="${html_content}<body>\n"
			html_content="${html_content}<p>This is an automatic message, please don't reply.</p>\n"
			html_content="${html_content}<p>The shared disk <code>${disk_public_path}</code> (${disk_size_in_gb} Gb) is $disk_fullness%% full. As a full disk will cause the jobs to unnecessarily fail, it's urgent to do some cleanup especially for the top biggest users:</p>\n"
			html_content="${html_content}<table>\n"
			html_content="${html_content}<th>rank</th><th>used size (in Mb)</th><th>owner</th><th>folder</th>\n"
			html_content="${html_content}${disk_usage_table_as_html}"
			html_content="${html_content}</table>\n"

			html_content="${html_content}<p>This message is automatically sent daily to all cluster users until the disk usage goes below ${fullness_threshold}%%. So if you're wondering \"All this is good, but can I do something to avoid this annoying message?\", then you'll probably be happy to hear that the answer is yes:</p>\n"
			html_content="${html_content}<ul>\n"
			html_content="${html_content}<li>if your name is not on the list above, then you can go and harrass a random user that is in the list;\n</li>\n"
			html_content="${html_content}<li>if your name is high on the list above, then you'd better do a cleanup of your files before the other cluster users find you! 😉\n</li>\n"
			html_content="${html_content}</ul>\n"

			html_content="${html_content}Reminder (cf <a href=http://intranet.ipr.univ-rennes1.fr/simpaweb/cluster/PhysixUserGuide>IPR cluster user guide</a>): the disk <code>${disk_public_path}</code> is not intended to be used for archiving (it's not even backed up!); it's a workspace whose purpose is to temporarily store the data required to make the jobs work. Except in some special use cases (eg reuse the output data as input for future jobs), the users are expected to transfer or delete the output files on <code>${disk_public_path}</code> once their job has ended.\n"

			html_content="${html_content}<p>The cluster user committee decided not to put quotas per user on <code>${disk_public_path}</code> because it would result in significantly less disk space per user and less flexibility. However, using this shared disk without quotas can only work if all users act in a responsible manner, cleaning up their own data after usage. Please make this work, it's in the interest of all users, including you. If you think the size of <code>${disk_public_path}</code> is too small for your needs, please contact the cluster administrators.</p>\n"

			html_content="${html_content}<p>Thank you for your understanding</p>\n"
			html_content="${html_content}<p>The IPR cluster administrators:</p>\n"
			html_content="${html_content}<ul>\n"
			html_content="${html_content}${admin_list_as_html}"
			html_content="${html_content}</ul>\n"
			html_content="${html_content}</body>\n"
			;;

		'french')
			html_content="${html_content}<body>\n"
			html_content="${html_content}<p>Ceci est un message automatique, ne pas répondre svp.</p>\n"
			html_content="${html_content}<p>Le disque partagé <code>${disk_public_path}</code> (${disk_size_in_gb} Gb) est plein $disk_fullness%%. Etant donné qu'un disque plein fait échouer les jobs et que cela peut être évité, il est urgent de procéder à un nettoyage, surtout pour les plus gros consommateurs:</p>\n"
			html_content="${html_content}<table>\n"
			html_content="${html_content}<th>rang</th><th>espace occupé (en Mb)</th><th>propriétaire</th><th>dossier</th>\n"
			html_content="${html_content}${disk_usage_table_as_html}"
			html_content="${html_content}</table>\n"

			html_content="${html_content}<p>Ce message est quotidiennement et automatiquement envoyé à tous les utilisateurs du cluster, jusqu'à ce que le taux de remplissage du disque retombe en dessous de ${fullness_threshold}%%. Si vous vous demandez \"c'est très bien tout ça, mais qu'est-ce que je peux y faire ?\", voici quelques éléments de réponse:</p>\n"
			html_content="${html_content}<ul>\n"
			html_content="${html_content}<li>si votre nom ne figure pas dans la liste ci-dessus, vous avez toujours la possibilité d'aller harceler un utilisateur dont le nom y figure;\n</li>\n"
			html_content="${html_content}<li>si votre nom figure dans la liste, il vous est chaudement recommandé de procéder à un nettoyage de vos fichiers avant que les autres utilisateurs ne vous trouvent! 😉\n</li>\n"
			html_content="${html_content}</ul>\n"

			html_content="${html_content}Pour rappel (cf <a href=http://intranet.ipr.univ-rennes1.fr/simpaweb/cluster/PhysixUserGuide>guide d'utilisation du cluster IPR</a>), le disque <code>${disk_public_path}</code> n'a pas vocation à servir pour de l'archivage (il n'est d'ailleurs pas sauvegardé!); c'est un espace de travail don't le rôle est de stocker temporairement les données nécéssaires à vos jobs. Sauf exception (par exemple, réutilisation des données de sortie pour des jobs futurs), les utilisteurs sont censés rapatrier ou effacer leurs données une fois leurs jobs terminés.\n"

			html_content="${html_content}<p>Le comité des utilisateurs du cluster n'a pas opté pour la mise en place de quotas sur <code>${disk_public_path}</code>, jugeant que non seulement cela affecterait la flexibilité, mais que les utilisateurs auraient au final moins d'espace disponible en pratique. Cependant, l'utilisation de ce disque partagé sans quotas ne peut fonctionner que si tout le monde joue le jeu et agit de façon responsable, en nettoyant ses données après utilisation. Svp faites en sorte que l'on puisse continuer de fonctionner ainsi, dans l'intérêt de tous. Si vous estimez que la taille du disque <code>${disk_public_path}</code> est trop petite pour vos besoins, n'hésitez pas à contacter les administrateurs du cluster.</p>\n"

			html_content="${html_content}<p>Merci de votre compréhension</p>\n"
			html_content="${html_content}<p>Les administrateurs du cluster IPR:</p>\n"
			html_content="${html_content}<ul>\n"
			html_content="${html_content}${admin_list_as_html}"
			html_content="${html_content}</ul>\n"
			html_content="${html_content}</body>\n"
			;;
		*)
			error "unexpected value for language_id : ${language_id}"
			return RETURNCODE_ERROR
	esac
	echo "$html_content"
}

function check_disk_usage()
{
	local shared_disk_root="$1"  # the eg /mnt/work
	local reports_root_path="$2"  # "$HOME/var/run/ipr/cluster/disk-watchdog"
	local disk_public_path="$3"  # eg /opt/ipr/cluster/work.global
	local fullness_threshold="$4" # eg '90' for 90%
	local dest_email="$5"  # eg ipr-cluster@listes.univ-rennes1.fr
	local trigger_reason="$6" # eg "cron.daily"
	local top_size="$7" # number of biggest directories reported (eg 20)
	local trigger_date="$(date)"

	disk_id=$(basename  "${shared_disk_root}")
	log "check of ${shared_disk_root} triggered by ${trigger_reason} with threshold ${fullness_threshold} %"

	mkdir -p "{$reports_root_path}"
	this_report_dir="${reports_root_path}/${disk_id}-$(date +'%Y-%m-%d-%H-%M-%S')"
	mkdir -p "${this_report_dir}"

	disk_global_usage_report_file_path="${this_report_dir}/total.txt"
	df -m ${shared_disk_root} > "$disk_global_usage_report_file_path"

	disk_usage_report_file_path="${this_report_dir}/subdirs.txt"
	tmp_disk_usage_report_file_path="${this_report_dir}/subdirs.tmp"
	log "listing usage of $shared_disk_root into $disk_usage_report_file_path"
	du -sm ${shared_disk_root}/* | tee "${tmp_disk_usage_report_file_path}"

	cat ${tmp_disk_usage_report_file_path} | awk '{ printf("%s ", $1); system("stat --printf=''%U'' " $2); printf(" %s\n", $2); }' >  "${disk_usage_report_file_path}"
	rm "${tmp_disk_usage_report_file_path}"
	# sed "s|${shared_disk_root}|${disk_public_path}|g" |

	local disk_fullness=$(sheet_get_column $disk_global_usage_report_file_path 'Use%' | sed 's/%//')
	local disk_size_in_mb=$(sheet_get_column $disk_global_usage_report_file_path '1M-blocks' | sed 's/%//')
	log "disk fullness = $disk_fullness %"

	if [ "$disk_fullness" -ge "${fullness_threshold}" ]
	then

		local to="$dest_email"
		local from="info-ipr@univ-rennes.fr"
		local subject="warning: The shared disk ${disk_public_path} is full at $disk_fullness%% !"
		local html_content=''
		local disk_usage_table_as_html="$(cat "${disk_usage_report_file_path}" | sort -r -n | head -${top_size} | awk '{printf("<tr><td>%s</td><td align=\"right\">%d</td><td align=\"center\">%s</td><td>%s</td></tr>\\n", NR, $1, $2, $3)}')"
		local disk_size_in_gb="$((disk_size_in_mb / 1024))"
		local admin_list_as_html=''
		admin_list_as_html="${admin_list_as_html}<li><a href=\"mailto:guillaume.raffy@univ-rennes.fr\">guillaume.raffy@univ-rennes.fr</a>\n</li>\n"
		admin_list_as_html="${admin_list_as_html}<li><a href=\"mailto:jeremy.gardais@univ-rennes.fr\">jeremy.gardais@univ-rennes.fr</a>\n</li>\n"

		local localized_message=''
		for language_id in 'french' 'english'
		do
			html_content="${html_content}<h1>${language_id} version</h1>"
			localized_message=$(create_html_contents "${language_id}" "${disk_usage_table_as_html}" "${disk_public_path}" "${disk_size_in_gb}" "${fullness_threshold}" "${admin_list_as_html}")
			if [ $? != "${RETURNCODE_SUCCESS}" ]
			then
				return "${RETURNCODE_ERROR}"
			fi
			html_content="${html_content}${localized_message}"
		done
		html_content="${html_content}<small>disk-watchdog.ipr.univ-rennes1.fr v1.0 - triggered from $(hostname --fqdn) by : $(whoami) (${trigger_reason}) on ${trigger_date}</small>"

		local sendmail_stdin="To: ${to}\nFrom: ${from}\nSubject: ${subject}\nContent-Type: text/html; charset=\"UTF-8\"\n<html>${html_content}</html>\n"
		printf "$sendmail_stdin" | /usr/sbin/sendmail "${to}"
		log "e-mail sent to ${to}"
	fi
}

set -o errexit

if [ $# = 7 ]
then
	check_disk_usage "${SHARED_DISK_ROOT}" "${REPORTS_ROOT_PATH}" "${DISK_PUBLIC_PATH}" "${FULLNESS_THRESHOLD}" "${DEST_EMAIL}" "${TRIGGER_REASON}" "${TOP_SIZE}"
else
	error "wrong number of arguments"
	exit "${RETURNCODE_ERROR}"
fi
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`#!/bin/bash`
			`# The role of this script is to measure the size of the directories in $SHARED_DISK_ROOT and send an e-mail to $DEST_EMAIL if its fullness goes beyond $FULLNESS_THRESHOLD`
			`# see https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 for details`

			`# test run:`
Update to @univ-rennes.fr domain 2023-08-16 05:58:32 +02:00			`# graffy@work:~/bug3193$ ./disk-watchdog.sh /mnt/work/graffy/workspaces/meniscus $HOME/var/run/ipr/cluster/disk-watchdog /opt/ipr/cluster/work.global 90 guillaume.raffy@univ-rennes.fr "manual trigger" 20`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00
			`# production run:`
improved disk-watchdog : the e-mail now includes the user triggering the script 2022-05-09 18:14:14 +02:00			`# graffy@work:~/bug3193$ sudo ./disk-watchdog.sh /mnt/work /var/run/ipr/cluster/disk-watchdog /opt/ipr/cluster/work.global 90 ipr-cluster@listes.univ-rennes1.fr "cron" 20`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00
			`SHARED_DISK_ROOT="$1" # eg '/mnt/work'`
			`REPORTS_ROOT_PATH="$2" # eg "$HOME/var/run/ipr/cluster/disk-watchdog"`
			`DISK_PUBLIC_PATH="$3" # 'eg /opt/ipr/cluster/work.global'`
			`FULLNESS_THRESHOLD="$4" # disk fullness percentage above which an e-mail is sent (eg. '90' for 90%)`
			`DEST_EMAIL="$5" # eg ipr-cluster@listes.univ-rennes1.fr`
improved disk-watchdog : the e-mail now includes the user triggering the script 2022-05-09 18:14:14 +02:00			`TRIGGER_REASON="$6" # eg "daily cron"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`TOP_SIZE="$7" # # number of biggest directories reported (eg 20)"`

			`RETURNCODE_SUCCESS=0`
			`RETURNCODE_ERROR=1`


			`function log()`
			`{`
			`message="$1"`
			`logger -t 'disk-watchdog' "$message"`
			`}`

			`function error()`
			`{`
			`message="$1"`
			`echo 1>&2 "$(date) : ERROR : $message"`
			`}`

			`function sheet_get_column()`
			`{`
			`local sheet_file_path="$1" # eg /var/run/ipr/cluster/disk-watchdog/meniscus-2022-05-07-16-43-38/total.txt`
			`local column_name="$2" # eg Use%`
			`cat "$sheet_file_path" \| awk -v col="${column_name}" '\`
			`NR==1 \`
			`{\`
			`for (i=1; i<=NF; i++)\`
			`if ($i == col)\`
			`{\`
			`c=i; break\`
			`}\`
			`}\`
			`NR != 1\`
			`{\`
			`print $c\`
			`}'`
			`}`

			`function create_html_contents()`
			`{`
			`local language_id="$1" # "french" or "english"`
			`local disk_usage_table_as_html="$2"`
			`local disk_public_path="$3"`
			`local disk_size_in_gb="$4"`
			`local fullness_threshold="$5"`
			`local admin_list_as_html="$6"`
Use a real sender for email See bugzilla 3582 https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3582 2023-06-23 11:14:31 +02:00
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`local html_content=''`

			`case ${language_id} in`
			`'english')`
			`html_content="${html_content}<body>\n"`
			`html_content="${html_content}<p>This is an automatic message, please don't reply.</p>\n"`
			`html_content="${html_content}<p>The shared disk <code>${disk_public_path}</code> (${disk_size_in_gb} Gb) is $disk_fullness%% full. As a full disk will cause the jobs to unnecessarily fail, it's urgent to do some cleanup especially for the top biggest users:</p>\n"`
			`html_content="${html_content}<table>\n"`
			`html_content="${html_content}<th>rank</th><th>used size (in Mb)</th><th>owner</th><th>folder</th>\n"`
			`html_content="${html_content}${disk_usage_table_as_html}"`
			`html_content="${html_content}</table>\n"`

			`html_content="${html_content}<p>This message is automatically sent daily to all cluster users until the disk usage goes below ${fullness_threshold}%%. So if you're wondering \"All this is good, but can I do something to avoid this annoying message?\", then you'll probably be happy to hear that the answer is yes:</p>\n"`
			`html_content="${html_content}<ul>\n"`
			`html_content="${html_content}<li>if your name is not on the list above, then you can go and harrass a random user that is in the list;\n</li>\n"`
			`html_content="${html_content}<li>if your name is high on the list above, then you'd better do a cleanup of your files before the other cluster users find you! 😉\n</li>\n"`
			`html_content="${html_content}</ul>\n"`

			html_content="${html_content}Reminder (cf <a href=http://intranet.ipr.univ-rennes1.fr/simpaweb/cluster/PhysixUserGuide>IPR cluster user guide</a>): the disk <code>${disk_public_path}</code> is not intended to be used for archiving (it's not even backed up!); it's a workspace whose purpose is to temporarily store the data required to make the jobs work. Except in some special use cases (eg reuse the output data as input for future jobs), the users are expected to transfer or delete the output files on <code>${disk_public_path}</code> once their job has ended.\n"

			html_content="${html_content}<p>The cluster user committee decided not to put quotas per user on <code>${disk_public_path}</code> because it would result in significantly less disk space per user and less flexibility. However, using this shared disk without quotas can only work if all users act in a responsible manner, cleaning up their own data after usage. Please make this work, it's in the interest of all users, including you. If you think the size of <code>${disk_public_path}</code> is too small for your needs, please contact the cluster administrators.</p>\n"

			`html_content="${html_content}<p>Thank you for your understanding</p>\n"`
			`html_content="${html_content}<p>The IPR cluster administrators:</p>\n"`
			`html_content="${html_content}<ul>\n"`
			`html_content="${html_content}${admin_list_as_html}"`
			`html_content="${html_content}</ul>\n"`
			`html_content="${html_content}</body>\n"`
			`;;`

			`'french')`
			`html_content="${html_content}<body>\n"`
			`html_content="${html_content}<p>Ceci est un message automatique, ne pas répondre svp.</p>\n"`
			`html_content="${html_content}<p>Le disque partagé <code>${disk_public_path}</code> (${disk_size_in_gb} Gb) est plein $disk_fullness%%. Etant donné qu'un disque plein fait échouer les jobs et que cela peut être évité, il est urgent de procéder à un nettoyage, surtout pour les plus gros consommateurs:</p>\n"`
			`html_content="${html_content}<table>\n"`
			`html_content="${html_content}<th>rang</th><th>espace occupé (en Mb)</th><th>propriétaire</th><th>dossier</th>\n"`
			`html_content="${html_content}${disk_usage_table_as_html}"`
			`html_content="${html_content}</table>\n"`

			`html_content="${html_content}<p>Ce message est quotidiennement et automatiquement envoyé à tous les utilisateurs du cluster, jusqu'à ce que le taux de remplissage du disque retombe en dessous de ${fullness_threshold}%%. Si vous vous demandez \"c'est très bien tout ça, mais qu'est-ce que je peux y faire ?\", voici quelques éléments de réponse:</p>\n"`
			`html_content="${html_content}<ul>\n"`
			`html_content="${html_content}<li>si votre nom ne figure pas dans la liste ci-dessus, vous avez toujours la possibilité d'aller harceler un utilisateur dont le nom y figure;\n</li>\n"`
			`html_content="${html_content}<li>si votre nom figure dans la liste, il vous est chaudement recommandé de procéder à un nettoyage de vos fichiers avant que les autres utilisateurs ne vous trouvent! 😉\n</li>\n"`
			`html_content="${html_content}</ul>\n"`

			html_content="${html_content}Pour rappel (cf <a href=http://intranet.ipr.univ-rennes1.fr/simpaweb/cluster/PhysixUserGuide>guide d'utilisation du cluster IPR</a>), le disque <code>${disk_public_path}</code> n'a pas vocation à servir pour de l'archivage (il n'est d'ailleurs pas sauvegardé!); c'est un espace de travail don't le rôle est de stocker temporairement les données nécéssaires à vos jobs. Sauf exception (par exemple, réutilisation des données de sortie pour des jobs futurs), les utilisteurs sont censés rapatrier ou effacer leurs données une fois leurs jobs terminés.\n"

fixed typo 2022-05-09 18:51:52 +02:00			html_content="${html_content}<p>Le comité des utilisateurs du cluster n'a pas opté pour la mise en place de quotas sur <code>${disk_public_path}</code>, jugeant que non seulement cela affecterait la flexibilité, mais que les utilisateurs auraient au final moins d'espace disponible en pratique. Cependant, l'utilisation de ce disque partagé sans quotas ne peut fonctionner que si tout le monde joue le jeu et agit de façon responsable, en nettoyant ses données après utilisation. Svp faites en sorte que l'on puisse continuer de fonctionner ainsi, dans l'intérêt de tous. Si vous estimez que la taille du disque <code>${disk_public_path}</code> est trop petite pour vos besoins, n'hésitez pas à contacter les administrateurs du cluster.</p>\n"
Use a real sender for email See bugzilla 3582 https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3582 2023-06-23 11:14:31 +02:00
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`html_content="${html_content}<p>Merci de votre compréhension</p>\n"`
			`html_content="${html_content}<p>Les administrateurs du cluster IPR:</p>\n"`
			`html_content="${html_content}<ul>\n"`
			`html_content="${html_content}${admin_list_as_html}"`
			`html_content="${html_content}</ul>\n"`
			`html_content="${html_content}</body>\n"`
			`;;`
			`*)`
			`error "unexpected value for language_id : ${language_id}"`
			`return RETURNCODE_ERROR`
			`esac`
			`echo "$html_content"`
			`}`

			`function check_disk_usage()`
			`{`
			`local shared_disk_root="$1" # the eg /mnt/work`
			`local reports_root_path="$2" # "$HOME/var/run/ipr/cluster/disk-watchdog"`
			`local disk_public_path="$3" # eg /opt/ipr/cluster/work.global`
			`local fullness_threshold="$4" # eg '90' for 90%`
			`local dest_email="$5" # eg ipr-cluster@listes.univ-rennes1.fr`
improved disk-watchdog : the e-mail now includes the user triggering the script 2022-05-09 18:14:14 +02:00			`local trigger_reason="$6" # eg "cron.daily"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`local top_size="$7" # number of biggest directories reported (eg 20)`
			`local trigger_date="$(date)"`

			`disk_id=$(basename "${shared_disk_root}")`
improved disk-watchdog : the e-mail now includes the user triggering the script 2022-05-09 18:14:14 +02:00			`log "check of ${shared_disk_root} triggered by ${trigger_reason} with threshold ${fullness_threshold} %"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00
			`mkdir -p "{$reports_root_path}"`
			`this_report_dir="${reports_root_path}/${disk_id}-$(date +'%Y-%m-%d-%H-%M-%S')"`
			`mkdir -p "${this_report_dir}"`

			`disk_global_usage_report_file_path="${this_report_dir}/total.txt"`
			`df -m ${shared_disk_root} > "$disk_global_usage_report_file_path"`

			`disk_usage_report_file_path="${this_report_dir}/subdirs.txt"`
			`tmp_disk_usage_report_file_path="${this_report_dir}/subdirs.tmp"`
			`log "listing usage of $shared_disk_root into $disk_usage_report_file_path"`
			`du -sm ${shared_disk_root}/* \| tee "${tmp_disk_usage_report_file_path}"`

reverted last change because it caused more problems than good 2022-05-09 18:50:35 +02:00			`cat ${tmp_disk_usage_report_file_path} \| awk '{ printf("%s ", $1); system("stat --printf=''%U'' " $2); printf(" %s\n", $2); }' > "${disk_usage_report_file_path}"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`rm "${tmp_disk_usage_report_file_path}"`
Use a real sender for email See bugzilla 3582 https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3582 2023-06-23 11:14:31 +02:00			`# sed "s\|${shared_disk_root}\|${disk_public_path}\|g" \|`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00
			`local disk_fullness=$(sheet_get_column $disk_global_usage_report_file_path 'Use%' \| sed 's/%//')`
			`local disk_size_in_mb=$(sheet_get_column $disk_global_usage_report_file_path '1M-blocks' \| sed 's/%//')`
			`log "disk fullness = $disk_fullness %"`

			`if [ "$disk_fullness" -ge "${fullness_threshold}" ]`
			`then`

			`local to="$dest_email"`
Use a real sender for email See bugzilla 3582 https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3582 2023-06-23 11:14:31 +02:00			`local from="info-ipr@univ-rennes.fr"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`local subject="warning: The shared disk ${disk_public_path} is full at $disk_fullness%% !"`
			`local html_content=''`
			`local disk_usage_table_as_html="$(cat "${disk_usage_report_file_path}" \| sort -r -n \| head -${top_size} \| awk '{printf("<tr><td>%s</td><td align=\"right\">%d</td><td align=\"center\">%s</td><td>%s</td></tr>\\n", NR, $1, $2, $3)}')"`
			`local disk_size_in_gb="$((disk_size_in_mb / 1024))"`
			`local admin_list_as_html=''`
Update to @univ-rennes.fr domain 2023-08-16 05:58:32 +02:00			`admin_list_as_html="${admin_list_as_html}<li><a href=\"mailto:guillaume.raffy@univ-rennes.fr\">guillaume.raffy@univ-rennes.fr</a>\n</li>\n"`
			`admin_list_as_html="${admin_list_as_html}<li><a href=\"mailto:jeremy.gardais@univ-rennes.fr\">jeremy.gardais@univ-rennes.fr</a>\n</li>\n"`
Use a real sender for email See bugzilla 3582 https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3582 2023-06-23 11:14:31 +02:00
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`local localized_message=''`
			`for language_id in 'french' 'english'`
			`do`
			`html_content="${html_content}<h1>${language_id} version</h1>"`
			`localized_message=$(create_html_contents "${language_id}" "${disk_usage_table_as_html}" "${disk_public_path}" "${disk_size_in_gb}" "${fullness_threshold}" "${admin_list_as_html}")`
			`if [ $? != "${RETURNCODE_SUCCESS}" ]`
			`then`
			`return "${RETURNCODE_ERROR}"`
			`fi`
			`html_content="${html_content}${localized_message}"`
			`done`
improved disk-watchdog : the e-mail now includes the user triggering the script 2022-05-09 18:14:14 +02:00			`html_content="${html_content}<small>disk-watchdog.ipr.univ-rennes1.fr v1.0 - triggered from $(hostname --fqdn) by : $(whoami) (${trigger_reason}) on ${trigger_date}</small>"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00
			`local sendmail_stdin="To: ${to}\nFrom: ${from}\nSubject: ${subject}\nContent-Type: text/html; charset=\"UTF-8\"\n<html>${html_content}</html>\n"`
			`printf "$sendmail_stdin" \| /usr/sbin/sendmail "${to}"`
			`log "e-mail sent to ${to}"`
			`fi`
			`}`

			`set -o errexit`

			`if [ $# = 7 ]`
			`then`
improved disk-watchdog : the e-mail now includes the user triggering the script 2022-05-09 18:14:14 +02:00			`check_disk_usage "${SHARED_DISK_ROOT}" "${REPORTS_ROOT_PATH}" "${DISK_PUBLIC_PATH}" "${FULLNESS_THRESHOLD}" "${DEST_EMAIL}" "${TRIGGER_REASON}" "${TOP_SIZE}"`
added a script to send a report on /opt/ipr/cluster/work.global usage when it's full This is to address https://bugzilla.ipr.univ-rennes1.fr/show_bug.cgi?id=3193 but this script will need to be triggered by cron.daily on work.ipr.univ-rennes1.fr 2022-05-09 17:48:58 +02:00			`else`
			`error "wrong number of arguments"`
			`exit "${RETURNCODE_ERROR}"`
			`fi`