From b1db14cbc025e3ed6ed773f9272d010f79b0026f Mon Sep 17 00:00:00 2001 From: Banana Date: Wed, 16 Oct 2024 08:12:26 +0200 Subject: [PATCH] adding big results also to the failed ones Signed-off-by: Banana --- crawler/fetch.pl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crawler/fetch.pl b/crawler/fetch.pl index 66cc1ab..e418e24 100644 --- a/crawler/fetch.pl +++ b/crawler/fetch.pl @@ -90,14 +90,15 @@ while ( my ($id, $url) = each %urlsToFetch ) { if ($res->is_success) { # callback tells us to stop if($res->header('Client-Aborted')) { - sayYellow "Aborted, too big."; + push(@urlsFailed, $id) $allFailed++; + sayYellow "Aborted, too big."; next; } if(index($res->content_type, "text/html") == -1) { - sayYellow "Fetching: $id ignored. Not html"; push(@urlsFailed, $id); $allFailed++; + sayYellow "Fetching: $id ignored. Not html"; next; } open(my $fh, '>:encoding(UTF-8)', "storage/$id.result") or die "Could not open file 'storage/$id.result' $!"; @@ -108,9 +109,9 @@ while ( my ($id, $url) = each %urlsToFetch ) { sayGreen"Fetching: $id ok"; } else { - sayRed "Fetching: $id failed: $res->code ".$res->status_line; push(@urlsFailed, $id); $allFailed++; + sayRed "Fetching: $id failed: $res->code ".$res->status_line; } if($counter >= $config->get("FETCH_URLS_PER_PACKAGE")) { -- 2.39.5