Below is a perl script I used to pull out all the URL and email addresses out of tcpflow results from network traffic of an infected storm bot. The script can be run using the following:
perl stormextraction -dir /data/tcpflowresults/
Here is the script:
#!/usr/bin/perl
# simple little hack to pull URL's out of tcpflow results from captured storm data
# JD Durick <jd@labgeek.net>
# runs on a directory after you have run:
tcpflow -r <storm pcap file>, this mainly contains email header information
# email address, subjects, and html links that you are asked to visit.
# version 0.1
# format of data: (really can be anything with a URL in the file)
#----------------
#To: <sms5672@daum.net>
#Subject: Holidays are near, but u know how not to give hangover a chance
#Date: Sat, 19 Apr 2008 12:13:18 -0400
#MIME-Version: 1.0
#Content-Type: text/plain;
# format=flowed;
# charset="windows-1250";
# reply-type=original
#Content-Transfer-Encoding: 7bit
#X-Priority: 3
#X-MSMail-Priority: Normal
#X-Mailer: Microsoft Outlook Express 5.50.4133.2499
#X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4133.2499
#
#Make your housewife happy with our original blue colored-tab!is http://starfoxguide.com
#TODO
# parse even more with URI to get just unique hostnames, something like $url->host()
# DNS resolver for each of URLS
# email domain breakdown
use Getopt::Long;
use MIME::Parser; # for later
use Digest::MD5 qw(md5 md5_hex md5_base64); # for later
use URI::Find;
use warnings;
use strict;
my ( $dir, $output, $fullpathname,, $file, $fsize ) = "";
my ( @dir_contents, %url, %emails ) = ();
my $counter = 0;
GetOptions(
"dir:s" => \$dir,
"output:s" => \$output
);
# get all the http:// urls that are found in all the emails sent out.
if ($dir) {
opendir( DIR, $dir ) || die("Cannot open directory !\n");
# Get contents of directory
@dir_contents = readdir(DIR);
# Close the directory
closedir(DIR);
foreach $file (@dir_contents) {
if ( !( ( $file eq "." ) || ( $file eq ".." ) ) ) {
$counter++;
$fullpathname = $dir . $file;
open( FILE, "<$fullpathname" );
$fsize = ( stat($fullpathname) )[7];
#print "[$counter]: Processing $fullpathname and size = $fsize\n";
if ( $fsize < 90000 ) {
while (<FILE>) {
find_uris(
$_,
sub {
my ( $uri, $orig_uri ) = @_;
$url{$orig_uri} = 1;
}
);
}
close FILE;
}
else {
next;
}
# lets get a list of all those email addresses we see
getEmail($fullpathname);
}
}
open( OUT, ">httpfile.txt" );
foreach my $u ( sort keys %url ) {
# lets get rid of those http://
# $u =~ s/http\:\/\///g;
# $u =~ s/https\:\/\///g;
print OUT "$u\n";
}
close OUT;
}
sub getEmail {
my $filename = shift;
open( FILE, "<$filename" );
while (<FILE>) {
next if ( $_ =~ /^\s*$/ );
if ( $_ =~ /\b([A-Za-z_%+0-9]+@[A-Z0-9a-z._]+\.[A-Za-z]{2,4})\b/ ) {
$emails{$1} = 1;
}
}
close FILE;
open( EMAIL, ">email.txt" );
foreach my $email ( keys %emails ) {
print EMAIL "$email\n";
}
close(OUT);
}
__END__