#!/usr/bin/env perl

## $Id$
## this script collects the informations on the server activity like:
## CPU, network I/O, memory usage ...
## modified to get rid of rperf.
## J-Y Nief - CCIN2P3 - 24/05/04
## modified to accept additional options and drop multi-home uf's.
#  Andy Hanushevsky - SLAC - 15/07/04
#
## rewrite the network part
## get rid of the netstat daemon, causing zombies proliferation
## fix the netstat parameters positions for both linux and sunos
## added debug mode -d
# Fabrizio Furano - CERN - 20/05/09
#
# Usage: XrdOlbMonPerf [-I netintf] [-n netspeed] [-m <shmkey>] [-d] [<interval>]
# WARNING: if you are running this script on a Linux RH7.x server, 
#          the network metric is disabled as the netstat output is screwed, 
#          this info is useless. no problem for RH9 and RHEL3.

# get the name of the server + platform
undef($shmid);
$shmkey = '';
$rhost  = `/bin/uname -n`; chomp($rhost);
$enBps  = 131072000;
$systype= `/bin/uname`; chomp($systype);
$repval = 0;
$netif  = '';
$netmax = 0; # bits / s
$monint = 60;
$debug = 0;
$precsumio = -1;
$| = 1;                      # unbuffer STDOUT

# Get options
#
while(($argval = shift) && (substr($argval,0,1) eq '-')) {
    if ($argval eq '-I') {
	Log("Network interface not specified.") if !($netif=shift);
    }
    elsif ($argval eq '-n') {
	Log("Network speed not specified.") if !($netmax=shift);
    }
    elsif ($argval eq '-m') {
	Log("Shared segment not specified.") if !($shmkey=shift);
	$shmid = shmget($shmkey, 4096, 0744) if (shmkey ne '.');
	Log("Unable to attach shared memory; $!") if !defined($shmid);  
    }
    elsif ($argval eq '-d') {
	Debug("Debug mode enabled.");
	$debug = 1;
    }
    else {
	Log("Unknown option, $argval, ignored", 1);
    }
}


Debug("Output is <load%> <cpu%> <mem%> <pgio%> <net_io%>") if ($debug == 1);

# Get the interval
#
if ($argval) 
   {$monint = $argval;
    Log("Invalid interval, '$monint'") if !($monint =~ /^\d+$/);
   }

Debug("Monitoring interval: '$monint'") if ($debug == 1);

# get the number of CPUs on this server
if ( $systype eq 'Linux' ) {
  $numCPU = `grep -c cpu /proc/stat` - 1;
}

if ( $systype eq 'SunOS' ) {
  $resp = `uname -X | grep NumCPU`;
  chomp($resp);
  @answer = split('= ', $resp);
  $numCPU = $answer[1];
}

Debug("Number of CPUs: '$numCPU'") if ($debug == 1);

# get the total memory of the server
if ($systype eq 'Linux') {
  $resp = `grep MemTotal /proc/meminfo`;
  @answer = split('\s+', $resp);
  $TotMem = $answer[1]*1024;
}

if ($systype eq 'SunOS') {
  $resp = `/etc/prtconf | grep Memory`;
  ($TotMem) = $resp =~ /.+:\s*(\d+)/s;
  $TotMem *= 1024*1024;
}

Debug("Total memory: '$TotMem'") if ($debug == 1);

# if it is a Linux, retrieve the distrib version.
# necessary for netstat which does not give the same output on RH 7.x, RH 9, 
# RHEL...
# netstat usable on RH 9 and RHEL 3 ==> distrib = "RH_OK", else "RH_WRONG"
if ($systype eq 'Linux') {
  $distrib = "RH_OK";

  if ( -r '/etc/redhat-release' ) {
      $resp = `cat /etc/redhat-release`;
  } else {
      if ( -r '/etc/debian-version' ) {
	  $resp = "debian".`cat /etc/debian-version`;
      }
      else {
	  $resp = "";
      }
  }

  if ( $resp =~ /release 7./ ) { $distrib = "RH_WRONG"; }
}

Debug("Distro: '$distrib'") if ($debug == 1);

# determine the column where the idle time and the free mem
# can be found in the 'vmstat' output.
# depends on the platform and on the 'vmstat' version, sigh... 
$ind_idle = 0;
$ind_free = 0;
@output = `vmstat`;
chomp($output[1]);
@answer = split('\s+', $output[1]);
foreach $type(@answer) {
  last if ( $type eq 'id' );
  $ind_idle++;
}
foreach $type(@answer) {
  last if ( $type eq 'free' );
  $ind_free++;
}

Debug("vmstat output. Idle is col: '$ind_idle'. Free mem is col: '$ind_free'") if ($debug == 1);

# in the netstat output, the number of input and output packets is not in the 
# same columns whether it is a Linux or SunOS platform
if ( $systype eq 'SunOS' ) {
    $ind_mtu = 1;
    $ind_net1 = 4;
    $ind_net2 = 6;
} else {
    $ind_mtu = 1;
    $ind_net1 = 3;
    $ind_net2 = 7;
}
$indx_net = 0;

Debug("netstat output. Input pkt is col: '$ind_net1'. Output pkt is col: '$ind_net2'") if ($debug == 1);

# check the number of network interface on the server and their 
# capability (ethernet or gigabit)
$mtu = 1500;
if (!$netmax)
   {@com = `netstat -i$netif`;
    foreach $line(@com) {
       if ( $line !~ /Kernel Interface/ && $line !~ /MTU/ ) {
          ( $line =~ /eth/ ) && ( $netmax = 1e7 );
          ( $line =~ /ge/ || $line =~ /ce/ ) && ( $netmax = 1e8 );
          }
      }
    if (!$netmax) {$netmax = 1e8;}

    # 10/100 cards are rare nowadays. And there is always the possibility of declaring the right value
    # with the -n switch
    $netmax = 1e8 if ($systype eq 'Linux');
   }

Debug("netmax is $netmax") if ($debug == 1);

# Discard the first three lines of output
#
for ($i = 0; $i < 3; $i++) {$resp = <CMDFD>;}

$ipio_old = 0;
while(1) {

  # what's the CPU utilization ?
  @respCPU = `vmstat 1 2`;
  chomp $respCPU[3];
  @resp_vmstat = split('\s+',$respCPU[3]);
  if ( ($resp_vmstat[$ind_idle] > 100) || ($resp_vmstat[$ind_idle] < 0) ) {
    $cpu = 0;
  } else {
    $cpu = $resp_vmstat[$ind_idle-2] + $resp_vmstat[$ind_idle-1];
  }

  # what's the runq load ?
  chomp($respLoad = `uptime`);
  @respSplit = split(',',$respLoad);
  Debug("uptime reports load5:$respSplit[5]") if ($debug == 1);

  $load = int($respSplit[5]*100/$numCPU);
  $load = 100 if $load > 100;

  # what's the network I/O activity?
  @resp = `netstat -i`;

  if ($distrib ne 'RH_WRONG') {
      $sumio = 0;
      foreach $line (@resp) {
	  Debug("$line") if ($debug == 1);

	  if ( $line !~ /Iface/ && $line !~ /Kernel/ && $line !~ /Name/) {
	      @respSplit = split(' ',$line);

	      $mtu = $respSplit[$ind_mtu];
	      $mtu = 1500 if $mtu == 0;
	      if ($respSplit[$0] !~ /lo/) {
		  Debug("$respSplit[$0] - Input pkts: $respSplit[$ind_net1] Output pkts: $respSplit[$ind_net2] Mtu: $mtu") if ($debug == 1);
		  $sumio += ($respSplit[$ind_net1] + $respSplit[$ind_net2]) * $mtu;
	      }
	  }
      }

      Debug("Summed network traffic: $sumio") if ($debug == 1);

      $precsumio = $sumio if ($precsumio < 0);
      $net_activity = ($sumio - $precsumio) / $monint;
      Debug("net_activity: $net_activity") if ($debug == 1);

      $precsumio = $sumio;
  }

  
  # This is to support 1G, 10G cards adaptively, or a number of cards together
  # In practice, we guess the max throughput of the network system
  # in order to calculate a decent percent utilization estimation
  # in the case the initial guess is completely wrong
  $netmax = $netmax * 2 if ( $net_activity > ($netmax * 2.5) );

  # We have to divide by two, but I really do not understand why
  # ... this was also in the previous implementation and it works
  $ipio = int ( $net_activity / $netmax * 100 / 2 );


  Debug("Calculated ipio: $ipio") if ($debug == 1);
  $ipio = 100 if ($ipio > 100);


  # what's the memory load ?
  $used = $TotMem - $resp_vmstat[$ind_free]*1024;
  $mem = int($used/$TotMem*100);
  $mem = 100 if $mem > 100;
  #
  $repval++;

  # what's the paging I/O activity ?
  # useless as this metric is highly correlated with some of the others above.
  # being kept for backward compatibility with the load balancer.
  $pgio = 0;

  if (!defined($shmid)) {
    print "$load $cpu $mem $pgio $ipio\n";
  }
  else {
    $resp = pack('LLLLL',$load,$cpu,$mem,$pgio,$ipio);
    Log("Unable to write into shared memory; $!")
      if !shmwrite($shmid, $resp, 0, length($resp));
  }

  sleep($monint-1);
}


sub Log {
  my($msg, $ret) = @_;
  print STDERR "XrdOlbMonPerf: $msg\n";
  system('/bin/logger', '-p', 'daemon.notice', "XrdOlbMonPerf: $msg");
  return 0 if $ret;
  exit(1);
}

sub Debug {
    my($msg) = @_;
    print STDERR "XrdOlbMonPerf: $msg\n";
}
