From ab73ddbc2e94e1127946e0b43683c441e03b9346 Mon Sep 17 00:00:00 2001 From: Banana Date: Sun, 13 Oct 2024 11:33:33 +0200 Subject: [PATCH] introduced subroutine to add info to the stats table Signed-off-by: Banana --- crawler/fetch.pl | 25 ++++++++----------------- crawler/lib/Aranea/Common.pm | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/crawler/fetch.pl b/crawler/fetch.pl index 3c32b9d..583d38b 100644 --- a/crawler/fetch.pl +++ b/crawler/fetch.pl @@ -23,7 +23,7 @@ use Data::Dumper; use Term::ANSIColor qw(:constants); use lib './lib'; -use Aranea::Common qw(sayLog sayYellow sayGreen sayRed); +use Aranea::Common qw(sayLog sayYellow sayGreen sayRed addToStats); use open qw( :std :encoding(UTF-8) ); use DBI; @@ -101,6 +101,7 @@ while ( my ($id, $url) = each %urlsToFetch ) { next; } open(my $fh, '>:encoding(UTF-8)', "storage/$id.result") or die "Could not open file 'storage/$id.result' $!"; + print $fh $url."\n"; # to know where it comes from print $fh $res->decoded_content(); close($fh); push(@urlsFetched, $id); @@ -129,23 +130,12 @@ updateFetched($dbh, @urlsFetched); updateFailed($dbh, @urlsFailed); # some stats stuff -my $queryStr = "INSERT INTO `stats` SET `action` = 'fetch', `value` = NOW() - ON DUPLICATE KEY UPDATE `value` = NOW()"; -$query = $dbh->prepare($queryStr); -$query->execute(); - -$queryStr = "INSERT INTO `stats` SET `action` = 'fetchfailed', `value` = '".$allFailed."' - ON DUPLICATE KEY UPDATE `value` = '".$allFailed."'"; -$query = $dbh->prepare($queryStr); -$query->execute(); - -$queryStr = "INSERT INTO `stats` SET `action` = 'fetchsuccess', `value` = '$allFetched' - ON DUPLICATE KEY UPDATE `value` = '$allFetched'"; -$query = $dbh->prepare($queryStr); -$query->execute(); - +addToStats($dbh, 'fetch'); +addToStats($dbh, 'fetchfailed', $allFailed, $allFailed); +addToStats($dbh, 'fetchsuccess', $allFetched, $allFetched); $dbh->commit(); + # end $dbh->disconnect(); sayGreen "Fetch complete"; @@ -155,6 +145,7 @@ sayGreen "Fetch complete"; ## update last_fetched in the table sub updateFetched { my ($dbh, @urls) = @_; + sayYellow "Update fetch timestamps: ".scalar @urls; $query = $dbh->prepare("UPDATE `url_to_fetch` SET `last_fetched` = NOW() WHERE `id` = ?"); foreach my $idToUpdate (@urls) { @@ -171,7 +162,7 @@ sub updateFailed { my ($dbh, @urls) = @_; sayYellow "Update fetch failed: ".scalar @urls; - $query = $dbh->prepare("UPDATE `url_to_fetch` SET `fetch_failed` = 1 WHERE `id` = ?"); + $query = $dbh->prepare("UPDATE `url_to_fetch` SET `fetch_failed` = 1, `last_fetched` = NOW() WHERE `id` = ?"); foreach my $idToUpdate (@urls) { sayLog "Update fetch failed for: $idToUpdate" if($DEBUG); $query->bind_param(1,$idToUpdate); diff --git a/crawler/lib/Aranea/Common.pm b/crawler/lib/Aranea/Common.pm index b832ffb..86cb03d 100644 --- a/crawler/lib/Aranea/Common.pm +++ b/crawler/lib/Aranea/Common.pm @@ -24,7 +24,7 @@ use DateTime; use Exporter qw(import); -our @EXPORT_OK = qw(sayLog sayYellow sayGreen sayRed); +our @EXPORT_OK = qw(sayLog sayYellow sayGreen sayRed addToStats); sub sayLog { my ($string) = @_; @@ -50,4 +50,24 @@ sub sayRed { say BOLD, RED, "[".$dt->datetime."] ".$string, RESET; } +## subroutine to add something to the stats table +## if $value or $onDuplicateValue is empty, NOW() is used. This is done with the COALESCE mysql method +sub addToStats { + my ($dbh, $action, $value, $onDuplicateValue) = @_; + + if(!defined $action || $action eq "") { + return; + } + + my $queryStr = "INSERT INTO `stats` SET `action` = ?, `value` = COALESCE(?, NOW())"; + $queryStr .= " ON DUPLICATE KEY UPDATE `value` = COALESCE(?, NOW())"; + my $query = $dbh->prepare($queryStr); + + $query->bind_param(1,$action); + $query->bind_param(2,$value); + $query->bind_param(3,$onDuplicateValue); + + $query->execute(); +} + 1; -- 2.39.5