Perl HTML::TableExtract Table Finder Loop

This script parses a web page and prints out all of the coordinates for tables found. This should make your life easier as far as determining the location of the table you are looking to parse.

#!c:\perl\bin\perl.exe

use LWP::Simple;
use LWP::UserAgent;
use HTML::TableExtract;

use CGI;
use CGI::Carp qw(fatalsToBrowser);
my $cgi = new CGI;

my $url = “http://www.drudgereport.com/”;
my $capture = get($url);

$depth = 0;
$count = 0;

while ($depth <= 10) {
DoLoop();
$depth++;
}

sub DoLoop {
for ($i = 0; $i <= 10; $i++)
{
$te = HTML::TableExtract->new( depth => “$depth”, count => $i );
$te->parse($capture);
my $table = $te->first_table_found;
foreach my $ts ($te->tables)
{
print “Table found at “, join(’,', $ts->coords), “:\n”;
foreach $row ($ts->rows) {
$x = 0;
print join(’,', @$row), “\n”;

}
}
}

}

About the Author

Greg

Leave a Reply

You can use these XHTML tags: <a href="" title=""> <abbr title=""> <acronym title=""> <blockquote cite=""> <code> <em> <strong>


Warning: stristr() [function.stristr]: Empty delimiter in /home/thegard5/public_html/gregjessup/wp-content/plugins/wassup/wassup.php on line 2093