Perl HTML::TableExtract Table Finder Loop
This script parses a web page and prints out all of the coordinates for tables found. This should make your life easier as far as determining the location of the table you are looking to parse.
#!c:\perl\bin\perl.exe
use LWP::Simple;
use LWP::UserAgent;
use HTML::TableExtract;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
my $cgi = new CGI;
my $url = “http://www.drudgereport.com/”;
my $capture = get($url);
$depth = 0;
$count = 0;
while ($depth <= 10) {
DoLoop();
$depth++;
}
sub DoLoop {
for ($i = 0; $i <= 10; $i++)
{
$te = HTML::TableExtract->new( depth => “$depth”, count => $i );
$te->parse($capture);
my $table = $te->first_table_found;
foreach my $ts ($te->tables)
{
print “Table found at “, join(’,', $ts->coords), “:\n”;
foreach $row ($ts->rows) {
$x = 0;
print join(’,', @$row), “\n”;
}
}
}
}


Leave a Reply