SHIFT

--- Sjoerd Hooft's InFormation Technology ---

User Tools

Site Tools


Sidebar

Recently Changed Pages:

View All Pages


View All Tags


LinkedIn




WIKI Disclaimer: As with most other things on the Internet, the content on this wiki is not supported. It was contributed by me and is published “as is”. It has worked for me, and might work for you.
Also note that any view or statement expressed anywhere on this site are strictly mine and not the opinions or views of my employer.


Pages with comments

View All Comments

dailycheckscript

Script: Bash: AIX: Daily Check Script

#!/bin/bash
########################################################################################################################
# Author : Sjoerd Hooft
# Date Initial Version: 27 Dec 2010
# Comments: sjoerd_@_warmetal_nl
#
# Description:
# This is a sample script to perform the daily checks on AIX servers.
#
# Recommendations:
# The script is designed for a 120 column terminal.
# The running user must be able to do a passwordless sudo to root.
#
# Changes:
# Please comment on your changes to the script (your name and email address, line number, description):
########################################################################################################################
 
# Script Variables
HOSTNAME_SHORT=`hostname -s`
AUTOMATIC=0
BASEDIR=`dirname $0`
LOGFILE="$BASEDIR/dc.log"
WHATAMI=`basename $0`
DATE=`date +%Y%m%d`
TOMAIL=sjoerd_@_warmetal_nl
BOLD=`tput bold`
BOLDOFF=`tput sgr0`
 
# Directories
APPDIR="/var/log/APP"
WASDIR="/opt/WAS_Profiles/AppSrv/logs"
FILE3DIR="/var/data/FILE3"
FILE1DIR="/var/data/FILE1/log"
JMSDIR="/var/data/app/jms_errors"
TOMCATDIR="/var/log/app"
 
# Oracle Variables
ORACLE_HOME="/opt/oracle/product/10.2"
ORACLE_BASE="/opt/oracle"
ORACLE_SID_DB1=db1
ORACLE_SID_DB2=db2
export ORACLE_HOME ORACLE_BASE
 
# Function to pause the script 
# The operator can evaluate the outcome of the previous function
scriptContinue () {
   if [ "$AUTOMATIC" == "0" ]; then
      echo "Press ENTER to continue"
      read CONTINUE
      clear
   fi
}
 
# Function that will list the AIX internal errors
checkErrors () {
   echo "$BOLD Listing the Error Logging Facility: $BOLDOFF"
   errpt
   echo
}
 
# Function that will clear all AIX internal errors
clearErrors () {
   echo "Clearing Errors"
   sudo errclear 0
}
 
# Function that will let the operator view the AIX internal errors in detail
viewErrors () {
   echo "Viewing Errors"
   errpt -a | less
}
 
# Function that will remove all files from the protected directory that holds JMS/MQ errors
removeJms () {
   echo "Are you sure you want to remove the JMS error files from $JMSDIR? "
   echo "If you hesitate, press CTRL+C to exit the script. "
   scriptContinue
   echo "Removing these files: "
   echo $JMSDIR/*
   sudo rm $JMSDIR/*
   echo
   echo "Done"
   echo
}
 
# Function that will check the last 4 logfiles from 4 different applications
# This is possible with multiple for loops since the files are named similar
# Known errors are being skipped
# It will show only the last 10 entries per logfile
checkLog-abs () {
   echo "$BOLD Checking abs-logs in $APPDIR $BOLDOFF "
   echo "Note: we check the last 4 logfiles and skip any known error, and limit the amount of lines to 10."
   for application in appserver1 appserver2 appserver3; do
      for logfile in app.log.4 app.log.3 app.log.2 app.log; do
         echo "Checking $BOLD $application-$logfile $BOLDOFF "
         cat $ABSDIR/$application-$logfile | grep ERROR | \
            grep -v 'LDAP: error code 32 - No Such Object' | \
            grep -v 'doRefreshProposalsResponse didn.d send the email caught - ignoring' | \
            grep -v 'Error getting active tan: No TAN available for user' | \
            grep -v 'CORBA OBJECT_NOT_EXIST' | \
            tail -10
         echo
      done
      scriptContinue
      clear
   done
   echo
}
 
# Function to check the SystemOut.log from the websphere applications
# Known errors are being skipped
# It will show only the last 10 entries per logfile
checkLog-was () {
   echo "$BOLD Checking websphere logs in $WASDIR $BOLDOFF "
   for server in server1 server2 server3 server4; do
      echo "Checking $BOLD ${server}_Server/SystemOut.log $BOLDOFF "
      cat $WASDIR/${server}_Server/SystemOut.log | grep -i error | \
         grep -v 'oracle.jdbc.driver.DatabaseError.throwSqlException' | \
         grep -v 'The Network Adapter could not establish the connectionDSRA0010E' | \
         grep -v 'Error creating XA Connection and Resource com.ibm.ws.exception.WsException: DSRA8100E' | \
         grep -v 'Error creating XA Connection and Resource java.security.PrivilegedActionException:' | \
         tail -10
      echo
      scriptContinue
   done
   echo
}
 
# Function to check whether files have been processed. 
# They will have a different extention.
checkFiles-host3 () {
   echo "$BOLD Checking the process on $HOSTNAME_SHORT $BOLDOFF "
   echo "There should be no files ending on .txt older than one hour:"
   echo "Last 10 files ending on .txt in $FILE3DIR:"
   ls -ltr $FILE3DIR | grep '\.txt$' | tail -10
   echo
   echo "$BOLD Checking the process on $HOSTNAME_SHORT $BOLDOFF "
   echo "There should be recent (last 24 hours) files:"
   echo "Last 10 files in in $FILE3DIR:"
   ls -ltr $FILE3DIR | grep '\.txt' |  tail -10
   echo
   scriptContinue
}
 
# Function that will check whether error files exist
# It will allow the operator, after examining the size, to delete them
# Continue works only in this menu structure because this is the last check for this host
checkFiles-host1 () {
   echo "$BOLD Checking MQ process error files on $HOSTNAME_SHORT $BOLDOFF "
   echo "Checking for jms (MQ) errors in $JMSDIR, there should be no files in this directory:"
   ls -ltr $JMSDIR
   if [ $AUTOMATIC == 0 ]; then
      JMSACTION=`ls -ltr $JMSDIR | wc -l`
      if [ $JMSACTION -gt 1 ]; then
         echo
         echo "${BOLD}There are files in this directory!$BOLDOFF If all files are really small ( < 100 bytes ) you can delete them. "
         echo "   Would you like to do that right now?"
         echo
         echo "remove            - remove all files in $JMSDIR"
         echo "continue          - continue with dailycheck"
         echo
         menuChoice
      fi
   scriptContinue
   else
      echo "AUTOMATIC mode is on. If there are any files run the script manually on $HOSTNAME_SHORT "
   fi
}
 
# Function to check Oracle logfile bdump for errors
# It will show the line with the error, as well as the 2 lines before and after
# It will show only the last 10 entries per logfile
checkLog-ora () {
   ORALOGDIR="/var/log/oracle/10.2/${ORACLE_SID}/bdump"
   echo "$BOLD Checking the Oracle logfile $ORALOGDIR/alert_$ORACLE_SID.log $BOLDOFF "
   echo "The last 10 ORA- messages are displayed, including the 2 lines before and the two lines after "
   sudo cat $ORALOGDIR/alert_$ORACLE_SID.log | sed -e '
      1{$!N;$d;}
      $!N;/ORA-/!D
      $!N;$d;N;p
      g;$!N;$d;N;D
      '| tail -10
   echo
   scriptContinue
}
 
# Function to check tomcat application servers for errors
# It will evaluate all logfiles created the last four days
# Known errors are being skipped
checkLog-tomcat () {
   echo "$BOLD Checking the tomcat application server logs on $HOSTNAME_SHORT $BOLDOFF "
   echo "$BOLD Checking Tomcat logfiles: $BOLDOFF"
   echo "Checking the last four days of $TOMCATDIR/applicaton.log files"
   find $TOMCATDIR/app/. -type f -name 'application*' -mtime -3 -print -exec cat {} \; | grep ERROR
   echo
   echo "Checking the last four days of $TOMCATDIR/framework.log files"
   find $TOMCATDIR/app/. -type f -name 'framework*' -mtime -3 -print -exec cat {} \; | grep ERROR
   echo
   scriptContinue
}
 
# Function to expand the options handling AIX system errors
actionErrors () {
   menuStart
   checkErrors
   echo "Note: The system clears all hardware errors automatically after 90 days, and all other errors after 30 days."
   echo
   echo "clearerrors       - clear all errors now"
   echo "viewerrors        - review errors in less"
}
 
# Function to specify which host the script runs on
# Declare host specific variables
# Set the actions to be taken
hostSpecific () {
   clear
   if [ "$HOSTNAME_SHORT" == "host1" ]; then
      checkLog-abs
      checkLog-was
      checkFiles-host1
   fi
   if [ "$HOSTNAME_SHORT" == "host2" ]; then
      export ORACLE_SID=$ORACLE_SID_DB2
      checkLog-ora
   fi
   if [ "$HOSTNAME_SHORT" == "host3" ]; then
      export ORACLE_SID=$ORACLE_SID_DB1
      checkLog-ora
      checkLog-tomcat
      checkFiles-host3
   fi
}
 
# Function to clear the screen and give the idea of a pretty script
menuStart () {
   clear
   echo "########################################################################################################################"
   echo "################################################### Daily Check Menu ###################################################"
   echo
}
 
# Function to show the operator the default menu options
menuEnd () {
   echo
   echo "errors            - take further actions regarding errors"
   echo "host              - start host specific checks"
   echo "auto              - restarts the script and runs it automatically, after which the logfile is mailed to $TOMAIL "
   echo "                  - this also works from the commandline: $WHATAMI auto "
   echo
   echo "exit              - exit"
   echo
}
 
# Function to read the menu option from the operator
# This menu is used for all required menus in the script
menuChoice () {
   echo "Enter menu choice: [exit]"
   read MENUCHOICE
 
   if [ -z "$MENUCHOICE" ]; then
   MENUCHOICE="exit"
   fi
 
   case $MENUCHOICE in
 
   errors )
      actionErrors
      menuChoice
   ;;
 
   host )
      hostSpecific
   ;;
 
   clearerrors )
      clearErrors
   ;;
 
   viewerrors )
      viewErrors
   ;;
 
   auto )
      $BASEDIR/$WHATAMI auto
      exit
   ;;
 
   exit )
      exit
   ;;
 
   remove )
      removeJms
   ;;
 
   continue )
      echo
   ;;
 
   * )
      echo "Wrong Input"
      menuChoice
   ;;
 
   esac
}
 
# Function to mail the log when the script has run automatically
mailLog () {
   cat $LOGFILE | mail -s "Report $WHATAMI on $HOSTNAME_SHORT of $DATE" $TOMAIL
}
 
# Function to determine whether the script should run automatically
# Set the automatic variables to send the output to a logfile instead of a screen 
# and make the logfile readable by removing bold text markers
# It also makes sure the logfile gets mailed
if [ "$1" == "auto" ]; then
   AUTOMATIC=1
   BOLD=
   BOLDOFF=
   exec > $LOGFILE 2>&1
   checkErrors
   hostSpecific
   mailLog
   exit
fi
 
# Actual script: 
# Infinite while loop, as long the script is not exited, 
# start the menu, check for errors and ask the operator what to do
while :
do
   menuStart
   checkErrors
   menuEnd
   menuChoice
done
You could leave a comment if you were logged in.
dailycheckscript.txt · Last modified: 2021/09/24 00:24 (external edit)