package Smokeping::probes::IRTT; =head1 301 Moved Permanently This is a Smokeping probe module. Please use the command C to view the documentation or the command C to generate the POD document. =cut use strict; use base qw(Smokeping::probes::basefork); #use Data::Dumper; use IPC::Open2 qw(open2); use JSON::PP qw(decode_json); use Path::Tiny qw(path); use Scalar::Util qw(looks_like_number); use Symbol qw(gensym); use Time::HiRes qw(usleep gettimeofday tv_interval); sub pod_hash { return { name => < DOC description => < to record network L, L or L (jitter), based on the value of the B variable. Additionally, the probe provides a results sharing feature, which allows using results from a single IRTT run to record multiple metrics for a given host at the same time. One target is defined with the B variable set, which selects the name of a temporary file to save the IRTT output to. Additional targets are defined with the B variable set to the same value, which, instead of running IRTT, wait for the main target's output to become available, then parse it to record the chosen metric from the same data. See the B and B variables for more information. =head2 WARNING The results sharing feature (B and B variables) requires the number of B for the IRTT probe to be at least the total number of IRTT targets defined (regardless of whether they have B and B set). Otherwise, there can be a deadlock while B targets wait for their corresponding B target to complete, which may never start. DOC authors => <<'DOC', Pete Heist DOC }; } sub new ($$$) { my $self = shift->SUPER::new(@_); # no need for this if we run as a cgi (still run at startup) unless ( $ENV{SERVER_SOFTWARE} ) { # check irtt version my $vout = `$self->{properties}->{binary} version` or die "ERROR: irtt version return code " . ($? >> 8); if ($vout =~ /irtt version: (\d+)\.(\d+)\.(\d+)/ ) { if ($1 == '0' && $2 < '9') { die "ERROR: unsupported irtt version: $1.$2.$3"; } } else { die "ERROR: irtt version unexpected output: $vout"; } }; return $self; } sub probevars ($) { my $class = shift; my $pv = $class->_makevars($class->SUPER::probevars, { _mandatory => [ 'binary' ], binary => { _doc => "The location of your irtt binary.", _default => '/usr/bin/irtt', _example => '/usr/local/bin/irtt', _sub => sub { my $val = shift; return "ERROR: irtt 'binary' does not point to an executable" unless -f $val and -x _; return undef; }, }, tmpdir => { _doc => "A temporary directory in which to place files for writeto/readfrom.", _default => '/tmp/smokeping-irtt', }, }); # TODO Delete probe timeout and define it per-target based on interval # (not ready yet as need to figure out logic in targetvars) #delete $pv->{timeout}; return $pv; } sub targetvars ($) { my $class = shift; my $tv = $class->_makevars($class->SUPER::targetvars, { dscp => { _doc => < value to use (C). This is the same as the classic one byte IP ToS field, but on the modern Internet, typically only the lower 6 bits are used, and this is called the DSCP value. The upper two bits are reserved for L. Hex may be used if prefixed by C<0x>. DOC _example => '46', _re => '(\d+|0x[0-9a-fA-F]{1,2})', }, extraargs => { _doc => < (see L). B with extra arguments, as some can corrupt the results. DOC _example => '--ttl=32', }, fill => { _doc => <). The B variable must be large enough so there's a payload to fill. Use rand for random fill, or see L for more options. DOC _example => 'rand', }, hmac => { _doc => < key to use when sending packets to the server (C). DOC _example => 'opensesame', }, interval => { _doc => <, but the unit is always seconds (s)). B If B is increased to greater than 5 seconds, the B (which defaults to B * 5 seconds + 1) must be modified so that SmokePing doesn't kill the probe prematurely. Additionally, B must not be increased such that B * B is greater than B. For example, at B=300 and B=20, the B must not be greater than 15 seconds, but should preferably be less to account for handshake and packet wait times. DOC _example => 1.5, _default => 1, _re => '(\d*\.)?\d+', }, ipversion => { _doc => < or C). By default the IP version is chosen based on the supplied host variable. DOC _example => 6, _re => '^(4|6)$', }, length => { _doc => <). The length includes IRTT headers, but not IP or UDP headers. The actual packet length is increased to accommodate the IRTT headers, if necessary. Header size as of IRTT 0.9.0 as used in SmokePing is 48 bytes when B is set (since both monotonic and wall clock values are requested) and 40 bytes otherwise. DOC _example => 172, _re => '\d+', }, localaddr => { _doc => <). See L Host formats for valid syntax. DOC _example => '192.168.1.10:63814', }, metric => { _doc => < =item * send: L I<(requires external time synchronization)> =item * receive: L I<(requires external time synchronization)> =item * ipdv: L (instantaneous packet delay variation, or jitter) =item * send_ipdv: IPDV for sent packets =item * receive_ipdv: IPDV for received packets =back Note that the C and C metrics require accurate external system clock synchronization, otherwise the values from one will be abnormally high and the other will be abnormally low or even negative, in which case the value 0 will be given SmokePing. It is recommended to install ntp on both the SmokePing client and IRTT server. Properly configured NTP may be able to synchronize time to within a few milliseconds, which is usually enough to provide useful results. PTP over a LAN may achieve microsecond-level accuracy. For best results between geographically remote hosts, GPS receivers may be used. Since C and C measure the variation in times between successive packets, and since C and C use monotonic clock values on the client side only, external time synchronization is not required for these metrics. DOC _default => 'rtt', _re => '^(rtt|send|receive|ipdv|send_ipdv|receive_ipdv)$', }, readfrom => { _doc => < to use the results from one IRTT run to record multiple metrics. The value will become the name of a file in B, and must be the same as another target's setting for B. Multiple targets may use the same value for B, but B and B may not be both set for a given target. When B is set, any variables that affect C are ignored because IRTT is not being invoked, including: B, B, B, B, B, B, B, B and B. These values are only relevant in the corresponding B target. Note that the B variable must still be defined for targets that define B, otherwise the target won't be used. When using this feature, be sure to have at least as many B for the IRTT probe as you have total IRTT targets defined. See the L section for more information. DOC _example => 'irtt1', }, readfrompollinterval => { _doc => < is set. Lower numbers will allow B to see the results a bit sooner, at the cost of higher CPU usage. Polling does not begin until the soonest time at which the IRTT client could have terminated normally. DOC _default => 5, _re => '[1-9]\d*', _example => '2', }, serverfill => { _doc => <). The B variable must be large enough to accommodate a payload. Use C for random fill, or see L for more options. DOC _example => 'rand', }, sleep => { _doc => < '0.5', _re => '(\d*\.)?\d+', }, writeto => { _doc => < to use the results from this IRTT run to record multiple metrics. The value will become the name of a file in B, and any targets with B set to the same value will use this target's results. There must be only one target with B set for a given file, and B and B may not be both set for a given target. When using this feature, be sure to have at least as many B for the IRTT probe as you have total IRTT targets defined. See the L section for more information. DOC _example => 'irtt1', }, }); # TODO Here I would like to be able to set the target-specific timeout # based on the interval and number of pings, but I'm currently unable to # get the number of pings in this method, before I have a value for target. #my $pings = $tv->{pings} ? $tv->{pings} : $class->SUPER::pings(); #$tv->{timeout} = $tv->{interval} * $pings + 5; return $tv; } sub ProbeDesc ($) { my $self = shift; return "IRTT round-trips"; } sub get_json_from_file ($$) { my $self = shift; my $target = shift; my $t = $target; my $tv = $t->{vars}; my $p = $self->{properties}; my $fname = path($p->{tmpdir}, $tv->{readfrom}); # mark start my $t0 = [gettimeofday]; # sleep, if requested usleep($tv->{sleep} * 1000000) if $tv->{sleep}; # wait for earliest possible finish, then 5 seconds at a time sleep $tv->{interval} * $self->pings($t) + 2; while (1) { # break when the file is found last if -f $fname; # die if step elapsed, which should never happen as we should # be killed by smokeping's timeout sooner than this if (tv_interval ($t0, [gettimeofday]) > $self->step) { die("ERROR: step elapsed and $fname not found"); } sleep $tv->{readfrompollinterval}; }; # return file contents return path($fname)->slurp; } sub run_irtt ($$) { my $self = shift; my $target = shift; my $t = $target; my $tv = $t->{vars}; my $p = $self->{properties}; # choose clock for requested metric my $clock; if ($tv->{writeto}) { $clock = 'both'; } else { $clock = $tv->{metric} =~ /(send|receive)/ ? 'wall' : 'monotonic'; } # build command my $count = $self->pings($t); my $interval = $tv->{interval}; my $duration = $interval * $count; my @cmd = ( $p->{binary}, 'client', '-i', $interval . 's', '-d', $duration . 's', '-Q', '--clock=' . $clock, '--tstamp=midpoint', '--stats=none', '-o', '-', ); push @cmd, ("-l", $tv->{length}) if $tv->{length}; push @cmd, "--hmac=" . $tv->{hmac} if $tv->{hmac}; push @cmd, "--dscp=" . $tv->{dscp} if $tv->{dscp}; push @cmd, "--fill=" . $tv->{fill} if $tv->{fill}; push @cmd, "--sfill=" . $tv->{serverfill} if $tv->{serverfill}; push @cmd, "--local=" . $tv->{localaddr} if $tv->{localaddr}; push @cmd, "-$tv->{ipversion}" if $tv->{ipversion}; push @cmd, $t->{addr}; # sleep, if requested usleep($tv->{sleep} * 1000000) if $tv->{sleep}; # execute irtt $self->do_debug("Executing @cmd"); my $inh = gensym; my $outh = gensym; my $pid = open2($outh, $inh, @cmd); my $out = do { local $/; <$outh> }; waitpid $pid,0; close $inh; close $outh; # write json output atomically if writeto set (empty for errors) if ($tv->{writeto}) { path($p->{tmpdir}, $tv->{writeto})->spew($out); } # die on non-zero status codes my $status = $? >> 8; die "ERROR: irtt client return code $status" if $status; return $out } sub nstos ($) { my $ns = shift; return $ns / 1000000000.0; } sub median { my @vals = sort {$a <=> $b} @_; my $len = @vals; if ($len%2) { return $vals[int($len/2)]; } else { return ($vals[int($len/2)-1] + $vals[int($len/2)])/2; } } sub pingone ($$) { my $self = shift; my $target = shift; my $t = $target; my $tv = $t->{vars}; my $p = $self->{properties}; # if writeto set, create temp directory or remove temp file if ($tv->{writeto}) { if ($tv->{readfrom}) { die("ERROR: writeto and readfrom must not both be set for the same target"); } my $d = $p->{tmpdir}; if (-d $d) { path($d, $tv->{writeto})->remove; } else { mkdir $d or die("ERROR: unable to create temp dir $d ($!)"); } } # get json from irtt, or file if readfrom set my $json; if ($tv->{readfrom}) { $json = get_json_from_file($self, $target); } else { $json = run_irtt($self, $target); } die("ERROR: json content empty") if $json eq ""; # decode json my $dec = decode_json($json) or die "ERROR: decode_json failed $!"; # get times for chosen metric from json my @times; foreach my $rt ( @{$dec->{'round_trips'}} ) { if ($rt->{'lost'} eq 'false') { my $ns; my $dl = $rt->{'delay'}; my $pv = $rt->{'ipdv'}; for ($tv->{metric}) { /^(rtt|send|receive)$/ && do { $ns = $dl->{$tv->{metric}}; if ($ns < 0) { $ns = 0; } next; }; /^ipdv$/ && do { $ns = $pv->{'rtt'}; next; }; /^send_ipdv$/ && do { $ns = $pv->{'send'}; next; }; /^receive_ipdv$/ && do { $ns = $pv->{'receive'}; next; }; die("ERROR: impossible metric $tv->{metric}") } push @times, nstos(abs($ns)) if looks_like_number($ns); } } # push an extra median value for ipdv, which has one fewer values # than pings, so there isn't a lost packet reported if ($tv->{metric} =~ /ipdv/ && @times > 0) { push @times, median(@times); } return sort { $a <=> $b } @times; } 1;