]> 91.132.146.200 Git - insipid.git/commitdiff
link checker
authorBanana <banana@starscream.de>
Fri, 17 Feb 2012 13:08:53 +0000 (14:08 +0100)
committerBanana <banana@starscream.de>
Fri, 17 Feb 2012 13:08:53 +0000 (14:08 +0100)
lib/Insipid/Stats.pm
tools/linkcheck.pl

index be6ba95ba34de0c1c3a95d5a9a1a16bb7918074c..d151bf0eb38d0a43d686a545d63ab5adbd2e2bd0 100644 (file)
@@ -51,35 +51,48 @@ sub groupByDomain {
        my $url = shift;
 
        my ($sql, $sth, @row);
-       my %domains = ();
+       my %domainGroup = ();
+       my @invalidDomains;
 
        if($url) {
 
        }
 
-       $sql = "SELECT `url` FROM `$tbl_bookmarks` ORDER BY `url`";
+       # keep order otherwise the while will not work
+       $sql = "SELECT `id`, `url`,
+                                       `linkcheck_status`,
+                                       `linkcheck_date`
+                       FROM `$tbl_bookmarks` ORDER BY `url`";
        $sth = $dbh->prepare($sql);
        $sth->execute;
 
        if($sth->rows ne 0) {
+               print "<h3> Invalid URLs after last linkcheck</h3>";
+               print "<ul>";
                while(@row = $sth->fetchrow_array()) {
-                       my $uri = URI->new($row['url']);
+                       my $uri = URI->new($row[1]);
 
-                       if($domains{$uri->host}) {
-                               $domains{$uri->host}++;
+                       if($row[2] eq 0) {
+                               print "<li><a href='$site_url/insipid.cgi?op=edit_bookmark&id=$row[0]'>".$row[1]."</a></li>";
+                       }
+
+                       if($domainGroup{$uri->host}) {
+                               $domainGroup{$uri->host}++;
                        } else {
-                               $domains{$uri->host} = 1;
+                               $domainGroup{$uri->host} = 1;
                        }
                }
+               print "</ul>";
 
-               if(%domains) {
+               if(%domainGroup) {
 
+                       print "<h3>Bookmarks grouped by domain</h3>";
                        print "<table cellpadding='2' cellspacing='0'>";
                        print "<tr><th>Domain</th><th>Count</th></tr>";
                        #for(sort keys %domains) {
-                       foreach (reverse sort { $domains{$a} <=> $domains{$b} } keys %domains ) {
+                       foreach (reverse sort { $domainGroup{$a} <=> $domainGroup{$b} } keys %domainGroup ) {
 
-                               print "<tr><td><a href='$site_url/insipid.cgi?bydomain=".uri_escape($_)."'>$_</a></td><td>$domains{$_}</td></tr>";
+                               print "<tr><td><a href='$site_url/insipid.cgi?bydomain=".uri_escape($_)."'>$_</a></td><td>$domainGroup{$_}</td></tr>";
                        }
                        print "</table>";
                }
index bb42cd0f9bbc257c7df57d7610e92c33eff7f6d7..8cb9614bb4c0903d7d1d07acacc9dac5f34c8452 100755 (executable)
@@ -22,6 +22,7 @@ use warnings;
 use strict;
 use Getopt::Long;
 use DBI;
+use LWP::UserAgent;
 
 BEGIN {
     binmode STDOUT, ':encoding(UTF-8)';
@@ -36,15 +37,17 @@ use Insipid::Bookmarks;
 
 $|=1;
 
-my $opt_help = 0;
+my $opt_help;
 my $opt_link = "all";
+my $opt_proxy;
 
 # if no arguments passed
 &usage if @ARGV < 1;
 
 GetOptions(
-       "help|h"                        => \$opt_help,
-       "link|l"                        => \$opt_link
+       "help|h"                => \$opt_help,
+       "link=s"                => \$opt_link,
+       "proxy=s"               => \$opt_proxy
 ) or die(&usage);
 
 &usage if $opt_help;
@@ -53,11 +56,43 @@ GetOptions(
 #
 # main
 #
-my $query = "SELECT `url` FROM `$tbl_bookmarks`";
-$query .= " WHERE `linkcheck_status` = " if($opt_link == 1);
-$query .= " WHERE `linkcheck_status` = " if($opt_link == 0);
-
-print $query;
+my $query = "SELECT `id`, `url` FROM `$tbl_bookmarks`";
+$query .= " WHERE `linkcheck_status` = 1" if($opt_link eq "active");
+$query .= " WHERE `linkcheck_status` = 0" if($opt_link eq "inactive");
+
+my $sth = $dbh->prepare($query);
+$sth->execute;
+if($sth->rows ne 0) {
+       my $ua = LWP::UserAgent->new;
+       $ua->timeout(5);
+       $ua->show_progress(1);
+       $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11");
+       $ua->proxy(['http'], $opt_proxy) if $opt_proxy;
+
+       $query = "UPDATE `$tbl_bookmarks`
+                               SET `linkcheck_status` = ?,
+                               `linkcheck_date` = ?
+                       WHERE `id` = ?";
+       my $sthupdate = $dbh->prepare($query);
+
+       while (my $hr = $sth->fetchrow_hashref) {
+               print $hr->{url}." ";
+
+               my $response = $ua->head($hr->{url});
+               my $status = 0;
+
+               if ($response->is_success) {
+                       print "Ok !\n";
+                       $status = 1;
+               }
+               else {
+                       print $response->status_line."\n";
+               }
+
+               $sthupdate->execute($status,time(),$hr->{id});
+
+       }
+}
 
 
 #
@@ -72,9 +107,10 @@ code. If so set the checkDate and result. Non 200 checks will be marked. Those
 can be checked seperately
 
        -h, --help              display this help message
-       -l, --link              all = check all links
+       --link=                 all = check all links
                                active = check only those which are not marked as inactive
                                inactive = check inactive only
+       --proxy=        proxy address if needed
 
 EOT
 ;