#!/usr/bin/perl -w

# Scribot, the irc blogging bot
# version 2.0

# originally based on POE::Component::IRC
# demonstration script by dennis taylor <dennis@funkplanet.com>
# 
# written by leon brocard <acme@astray.com>
#
# patched by simon wistow <simon@thegestalt.org> to
#  - tell the blogger if they're putting a duplicate in
#  - allow them to override
#  - allow searching
#  - be more sane about white space
#  - use URI::Find
#  - allow the use of , or : or - as ways of addressing the bot
#  - no hardcoded stuff (sort of)

####
# TODO
#
# o get rid of the file and make it all DB based
# o fix the web front end to use that
# o add searching on the web frontend
# o store dates and times
#
####


# instructions :
# this is a sample session. The comments in [square brackets]
# tell you what was actually stored in the logs. The bot here 
# is 'scrinew' and the user is 'muttley' (which is me :)

# <@muttley> scrinew, http://www.foo.com
# [ muttley : http://www.foo.com ] 

# <@muttley> scrinew, http://www.foo.com
# < scrinew> Sorry, muttley, That's been blogged before
# [ this also sends a privmessage telling muttley how to override ]

# <@muttley> scrinew, override http://www.foo.com
# [ muttley : http://www.foo.com ]

# <@muttley> scrinew, override http://www.bar.com
# [ muttley : override http://www.bar.com ]

# <@muttley> scrinew, got http://www.foo.com
# < scrinew> that's been blogged 2 times
# <@muttley> scrinew, got http://www.bar.com
# < scrinew> that's been blogged 1 time
# <@muttley> scrinew, got http://www.baz.com
# < scrinew> nope, that's a new one to me
# <@muttley> scrinew, got www.baz.com
# < scrinew> nope, that's a new one to me
#  <@muttley> scrinew, got baz.com
# [ muttley : got baz.com ]

# <@muttley> scrinew, override is a word
# [ muttley : override is a word ]




use strict;
use lib '/virtual/astray.com/www/lib/';
use HTML::Entities;
use POE::Kernel;
use POE::Session;
use POE::Component::IRC;
use URI::Find;
use GDBM_File;
use Fcntl qw(O_CREAT O_RDWR);


$| = 1; # no need to buffer STDOUT


####
#
# CONFIGURATION OPTIONS
# (season to taste)
#
####


# these should possibly go in a config file

my $nick    = "scrinew";               # the nick of the bot, obviously
my $channel = "#scritest";             # and the channel it will sit on
my $blog    = "scribot.blog";          # where the blogging file is
my $urldb   = "url.db";                # where the url store will go
my $log     = "scribot.log";           # where the log file will go
my $server  = "london.rhizomatic.net"; # the irc server
my $DEBUG   = 0;                       # some debugging options


# load all the urls into a hash for fast lookup
# first, we check to see if the file existed before

my $existed = (-f $urldb);

# now we tie the DB to the urls
tie my %urls,  'GDBM_File',  $urldb,  O_CREAT|O_RDWR, 0600;


# now, if this is the first time doing this then 
# we schlurp the URLS from the blog file into the DB

goto SKIP if ($existed);

open (BLOG, "$blog") || die "Couldn't open '$blog' for reading : $!\n";
while (<BLOG>)
{
    
	
    find_uris($_,
            sub {
                my($uri, $orig_uri) = @_;
		$urls{$uri}++;
                return $uri;
           });


}
close BLOG;

SKIP:


# open up the log and blog files
open(LOG, "> $log")    || die "Couldn't open '$log' for writing : $!\n";
open(BLOG, ">> $blog") || die "Couldn't open '$blog' for appending : $!\n";


select BLOG;
$| = 1;  # no need to buffer to the BLOG file
select STDOUT;

# This gets executed as soon as the kernel sets up this session.
sub _start {
  my ($kernel, $session) = @_[KERNEL, SESSION];

  # Uncomment this to turn on more verbose POE debugging information.
  # $session->option( trace => 1 );

  # Make an alias for our session, to keep it from getting GC'ed.
  $kernel->alias_set( 'smileyninja' );

  # Ask the IRC component to send us all IRC events it receives. This
  # is the easy, indiscriminate way to do it.
  $kernel->post( 'test', 'register', 'all');

  # Setting Debug to 1 causes P::C::IRC to print all raw lines of text
  # sent to and received from the IRC server. Very useful for debugging.
  $kernel->post( 'test', 'connect', { Debug    => 0,
				      Nick     => $nick,
				      Server   => $server,
				      Port     => 6667,
				      Username => $nick,
				      Ircname  => "$channel blogger", }
			   );
}


# After we successfully log into the IRC server, set our nick and join a channel.
sub irc_001 {
  my ($kernel) = $_[KERNEL];

  $kernel->post( 'test', 'mode', $nick, '+i' );
  $kernel->post( 'test', 'join', $channel );
}


# should probably get rid of this
sub irc_dcc_send {
  my ($nick, $port, $file, $size, $done) = @_[ARG0 .. $#_];

  printf LOG "DCC SEND to $nick ($file): $done bytes of $size sent.   %d%%\n",
	($done / $size) * 100;
}


sub _default {
  my ($kernel, $state, $event, $args) = @_[KERNEL, STATE, ARG0, ARG1];

  $args ||= [];
  print LOG "$state -- $event @$args\n" if ($DEBUG);

  # munge the From name
  my $from = $args->[0];
  $from =~ s|!.+$||;

  # and set the person we'll reply to
  my $to = $from;

  # then grab the body of the text
  my $text = $args->[2] || "";

  # if we're being karmad then randomly karma the person back
  if ($text =~ /^\s*$nick\+\+/) 
  {
    show($kernel, $channel, $from . "++ # hey, thanks yourself!") if (rand() < .5);
    return;
  }

  # no privmsging - all blogging must be done in private
  return unless $event eq 'irc_public';

  # ignore everything that's not addressed to us
  return unless $text =~ /^\s*$nick\s*[,:-]/;

  
  # return if it's one of the bots, or ourselves
  # this should be in a config file
  return if $from eq  $nick;
  return if $from eq 'dipsy';
  return if $from eq 'dadadodo';
  return if $from eq 'lala';


  # remove our nick from the start of the line
  $text =~ s/^\s*$nick\s*:?,?-?\s?\s*//;

  # now see if it starts with override or got which
  # are our commands. This shoudl probably be rewritten
  # to be more easily extensible
  $text =~ s/^(override|got\s*)//;

  # do we have a command or not
  my $command = $1 || ""; 

  $to = "$channel";

  # $text =~ s/\?$//; # remove a question mark from the end of the line


  print LOG "got from $from: $text\n" if ($DEBUG);


  # state variables
  my $seenbefore = 0;
  my $goturl = 0; 

  # check to see if there's a url on the line
  # this fails if there are two URIs on the line
  # c'est la vie, patches to fix this welcome
  find_uris($text,
                   sub {
                       my($uri, $orig_uri) = @_;
		       $goturl++;                 # indicate that there's a URL (I really hate the URI::Find interface)
		       $seenbefore = $urls{$uri}; # and work out how many we've seen before 
                                                
						  # don't inc the number of times we'e seen this if ...
 						  #  - we've seen it before and the command is empty (i.e not overriding)
						  #  - we're searching
                       $urls{$uri}++ unless (($command eq "" && $seenbefore) || $command =~ /^got/);

                       return $orig_uri;
                   });


  # if we're searching and there *was* a URI on the line ...
  if ($command =~ /^got/ && $goturl) 
  {
        # construct a message and send it back
	my $message;

	if ($seenbefore) {
		$message = "that's been blogged $seenbefore time";
		$message .= "s" if ($seenbefore>1);
	} else {
		$message = "nope, that's a new one to me";
	}

	show($kernel, $channel, "$message");

  # if we're not overriding and we've seen it before, reject it
  } elsif ($seenbefore && $command !~ /^override/) {

	# send a message to channel
	show($kernel, $channel, "Sorry, $from, That's been blogged before");
	# and privmsg them with instructions
      	$kernel->post('test', 'privmsg', $from, 'You can tell me to put it in anyway by saying "override '.$text.'"');

  } else {

	# we've seen it before (but we're obviously overriding)
	if ($seenbefore)
	{
		$command = "";       # blank the command
		$text   =~ s/^\s*//; # remove whitespace from the start of the text
	}

	# blog it with reinserted command
        # this is so that we can write 'override $url' or 'override is a word'
        # and if the url hasn't been seen before then it just works
  	print BLOG "$from: $command$text\n"; 
  }
}


# munge the text for length and stuff
sub show {
  my($kernel, $what, $text) = @_;

  if (not $text) {
      $text = $what;
      $what = $channel;
  }

  chomp $text;
  $text =~ s|\n| / |g;

  if ($what =~ /^#/ and length($text) > 60) {
    $text = substr($text, 0, 70);
    $text .= '...';
  } elsif (length($text) > 400) {
    $text = substr($text, 0, 400);
    $text .= '...';
  }

  $kernel->post('test', 'privmsg', $what, $text);
}

  

sub _stop {
  my ($kernel) = $_[KERNEL];

  print LOG "Control session stopped.\n";
  $kernel->post( 'test', 'quit', 'Neenios on ice!' );
  $kernel->alias_remove( 'smileyninja' );
}


sub irc_disconnected {
  my ($server) = $_[ARG0];
  print LOG "Lost connection to server $server.\n";
}


sub irc_error {
  my $err = $_[ARG0];
  print LOG "Server error occurred! $err\n";
}


sub irc_kick {
  my ($who, $where, $isitme, $reason) = @_[ARG0 .. ARG4];

  print LOG "Kicked from $where by $who: $reason\n" if $isitme eq $nick;
}


# here's where execution starts.

# Change this '0' to '1' for lots of debugging information.
POE::Component::IRC->new( 'test' ) or
  die "Can't instantiate new IRC component!\n";
POE::Session->new( 'main' => [qw(_start _stop _default irc_001 irc_kick
				 irc_disconnected irc_error irc_dcc_send)] );
$poe_kernel->run();

exit 0;


syntax highlighted by Code2HTML, v. 0.9