]> 91.132.146.200 Git - insipid.git/commitdiff
Fixed snapshot parsing and viewing
authorLuke Reeves <luke@neuro-tech.net>
Sun, 12 Jul 2009 15:48:09 +0000 (11:48 -0400)
committerLuke Reeves <luke@neuro-tech.net>
Sun, 12 Jul 2009 15:48:09 +0000 (11:48 -0400)
lib/Insipid/Parser.pm [new file with mode: 0755]
lib/Insipid/Snapshots.pm

diff --git a/lib/Insipid/Parser.pm b/lib/Insipid/Parser.pm
new file mode 100755 (executable)
index 0000000..d9a41a3
--- /dev/null
@@ -0,0 +1,115 @@
+package Insipid::Parser;\r
+\r
+use HTML::Parser;\r
+use HTML::Entities ();\r
+use URI::URL;\r
+use Digest::MD5 qw(md5 md5_hex);\r
+use Insipid::Config;\r
+use Insipid::Database;\r
+\r
+use vars qw(@ISA);\r
+@ISA = qw(HTML::Parser);\r
+\r
+sub setSnapshotMap {\r
+       my($self, $ssMap) = (@_);\r
+       $self->{SSMAP} = $ssMap;\r
+}\r
+\r
+sub new {\r
+       my $pack = shift;\r
+       my $self = $pack->SUPER::new;\r
+       @{$self}{qw(__base __grabit)} = @_;\r
+       $self;\r
+}\r
+\r
+sub declaration {\r
+       my $self = shift;\r
+       my ($decl) = @_;\r
+}\r
+\r
+sub start {\r
+       my $self = shift;\r
+       my ($tag, $attr, $attrseq, $origtext) = @_;\r
+\r
+       if(!defined($self->{__grabit})) {\r
+               print("<$tag");\r
+       }\r
+\r
+       for (keys %$attr) {\r
+               my $val = $attr->{$_};\r
+               if(($_ eq "/") && ($val = "/")) { next; }\r
+\r
+               if(!defined($self->{__grabit})) { \r
+                       print(" $_=\""); \r
+               }\r
+\r
+               if( "$tag $_" =~ /^(link href|img src)$/i) {\r
+                       $val = url($val)->abs($self->{__base},1);\r
+\r
+                       if(!defined($self->{__grabit})) {\r
+                               if($val =~ /(\.gif|\.jpg|\.png|\.css)$/i) {\r
+                                       my $md5 = md5_hex("$val");\r
+                                       $val = $snapshot_url . $md5;\r
+                               }\r
+                       } else {\r
+                               # JPG, GIF, PNG and CSS\r
+                               if($val =~ /(\.gif|\.jpg|\.png|\.css)$/i) {\r
+                                       join_urls($self->{__base}, $val);\r
+                                       $val = $self->{__grabit}($val, $1);\r
+                               }\r
+                       }\r
+               }\r
+\r
+               if(!defined($self->{__grabit})) {\r
+                       # Check against our snapshot map\r
+                       if(($tag =~ /^a/i) && ($_ =~ /^href/i)) {\r
+                               my $sst = $self->{SSMAP};\r
+\r
+                               if(defined($sst->{$val})) {\r
+                                       print $snapshot_url . $sst->{$val};\r
+                                       print('"');\r
+                               } else {\r
+                                       print("$val\"");\r
+                               }\r
+                       } else {\r
+                               print("$val\"");\r
+                       }\r
+               }\r
+       }\r
+\r
+       if(!defined($self->{__grabit})) { print(">"); }\r
+}\r
+\r
+sub end {\r
+       my $self = shift;\r
+       my ($tag) = @_;\r
+\r
+       if(!defined($self->{__grabit})) { print("</$tag>"); }\r
+}\r
+\r
+sub text {\r
+       my $self = shift;\r
+       my ($text) = @_;\r
+\r
+       if(!defined($self->{__grabit})) { print("$text"); }\r
+}\r
+\r
+sub comment {\r
+       my $self = shift;\r
+       my ($comment) = @_;\r
+\r
+       if(!defined($self->{__grabit})) { print("<!-- $comment -->"); }\r
+}\r
+\r
+sub join_urls {\r
+       my($parent, $child) = (@_);\r
+       my $sql = "insert into $tbl_pagecache_references(md5_parent, md5_child) values(?, ?)";\r
+       my $sth = $dbh->prepare($sql);\r
+       $sth->execute(md5_hex($parent), md5_hex($child));\r
+       if($sth->err) {\r
+               # ignore errors for now\r
+       }\r
+}\r
+\r
+1;\r
+__END__\r
index 3e2f56732eb9593448ed0ce459feb35ea545c073..9b802b4f09a6f574fcae22a37dd48cc9ee06fe70 100755 (executable)
@@ -28,6 +28,7 @@ use Insipid::Config;
 use Insipid::Database;\r
 use Insipid::Util;\r
 use Insipid::LinkExtractor;\r
+use Insipid::Parser;\r
 use CGI qw/:standard/;\r
 use CGI::Carp qw(fatalsToBrowser);\r
 use Date::Format;\r
@@ -228,7 +229,7 @@ sub show_snapshot {
                }\r
 \r
                print "\r\n";\r
-               my $p = MyParser->new($row[2], undef);\r
+               my $p = Insipid::Parser->new($row[2], undef);\r
                $p->setSnapshotMap(\%internalLinks);\r
                \r
                if($row[0] =~ /utf/i) {\r
@@ -476,7 +477,7 @@ sub do_snapshot {
 sub parsepage {\r
        my ($url, $content, $content_type) = (@_);\r
 \r
-       my $p = MyParser->new($url, \&fetch_url);\r
+       my $p = Insipid::Parser->new($url, \&fetch_url);\r
        if($content_type =~ /utf/i) { \r
                $p->utf8_mode(1);\r
        }\r
@@ -484,122 +485,5 @@ sub parsepage {
        $p->parse($content);\r
 }\r
 \r
-## "use MyParser;" ## TODO: Make this a separate file.\r
-BEGIN {\r
-       package MyParser;\r
-       use HTML::Parser;\r
-       use HTML::Entities ();\r
-       use URI::URL;\r
-       use Digest::MD5 qw(md5 md5_hex);\r
-       use Insipid::Config;\r
-       use Insipid::Database;\r
-\r
-       use vars qw(@ISA);\r
-       @ISA = qw(HTML::Parser);\r
-\r
-       sub setSnapshotMap {\r
-               my($self, $ssMap) = (@_);\r
-               $self->{SSMAP} = $ssMap;\r
-       }\r
-\r
-       sub new {\r
-               my $pack = shift;\r
-               my $self = $pack->SUPER::new;\r
-               @{$self}{qw(__base __grabit)} = @_;\r
-               $self;\r
-       }\r
-\r
-       sub declaration {\r
-               my $self = shift;\r
-               my ($decl) = @_;\r
-       }\r
-\r
-       sub start {\r
-               my $self = shift;\r
-               my ($tag, $attr, $attrseq, $origtext) = @_;\r
-\r
-               if(!defined($self->{__grabit})) {\r
-                       print("<$tag");\r
-               }\r
-\r
-               for (keys %$attr) {\r
-                       my $val = $attr->{$_};\r
-                       if(($_ eq "/") && ($val = "/")) { next; }\r
-\r
-                       if(!defined($self->{__grabit})) { \r
-                               print(" $_=\""); \r
-                       }\r
-\r
-                       if( "$tag $_" =~ /^(link href|img src)$/i) {\r
-                               $val = url($val)->abs($self->{__base},1);\r
-       \r
-                               if(!defined($self->{__grabit})) {\r
-                                       if($val =~ /(\.gif|\.jpg|\.png|\.css)$/i) {\r
-                                               my $md5 = md5_hex("$val");\r
-                                               $val = $snapshot_url . $md5;\r
-                                       }\r
-                               } else {\r
-                                       # JPG, GIF, PNG and CSS\r
-                                       if($val =~ /(\.gif|\.jpg|\.png|\.css)$/i) {\r
-                                               join_urls($self->{__base}, $val);\r
-                                               $val = $self->{__grabit}($val, $1);\r
-                                       }\r
-                               }\r
-                       }\r
-\r
-                       if(!defined($self->{__grabit})) {\r
-                               # Check against our snapshot map\r
-                               if(($tag =~ /^a/i) && ($_ =~ /^href/i)) {\r
-                                       my $sst = $self->{SSMAP};\r
-\r
-                                       if(defined($sst->{$val})) {\r
-                                               print $snapshot_url . $sst->{$val};\r
-                                               print('"');\r
-                                       } else {\r
-                                               print("$val\"");\r
-                                       }\r
-                               } else {\r
-                                       print("$val\"");\r
-                               }\r
-                       }\r
-               }\r
-\r
-               if(!defined($self->{__grabit})) { print(">"); }\r
-       }\r
-\r
-       sub end {\r
-               my $self = shift;\r
-               my ($tag) = @_;\r
-\r
-               if(!defined($self->{__grabit})) { print("</$tag>"); }\r
-       }\r
-\r
-       sub text {\r
-               my $self = shift;\r
-               my ($text) = @_;\r
-\r
-               if(!defined($self->{__grabit})) { print("$text"); }\r
-       }\r
-\r
-       sub comment {\r
-               my $self = shift;\r
-               my ($comment) = @_;\r
-\r
-               if(!defined($self->{__grabit})) { print("<!-- $comment -->"); }\r
-       }\r
-       \r
-       sub join_urls {\r
-               my($parent, $child) = (@_);\r
-               my $sql = "insert into $tbl_pagecache_references(md5_parent, md5_child) values(?, ?)";\r
-               my $sth = $dbh->prepare($sql);\r
-               $sth->execute(md5_hex($parent), md5_hex($child));\r
-               if($sth->err) {\r
-                       # ignore errors for now\r
-               }\r
-       }\r
-\r
-}\r
-## end "use MyParser;" ##\r
-\r
 1;\r
 __END__\r