+Avoid download to mutch data. Content check before?
Set correct timezone. Maybe in config?
Some sort of matching against spam domain list?
A web view for the results?
$query = $dbh->prepare($queryStr);
foreach my $invalidId (@invalidUrls) {
$query->execute($invalidId);
- $query->finish();
+ #$query->finish();
sayLog "Removed $invalidId from unique_domain" if $DEBUG;
}
sayGreen "Invalid unique_domain removed: ".scalar @invalidUrls;
my $baseUrl = $row[1];
push(@toBeDeletedFromFetchAgain, $baseUrl);
}
-$query->finish();
+#$query->finish();
sayYellow "Remove baseurls from url_to_fetch: ".scalar @toBeDeletedFromFetchAgain;
$queryStr = "DELETE FROM url_to_fetch WHERE `baseurl` = ?";
$query = $dbh->prepare($queryStr);
foreach my $baseUrl (@toBeDeletedFromFetchAgain) {
$query->execute($baseUrl);
- $query->finish();
+ #$query->finish();
sayLog "Removed $baseUrl from url_to_fetch" if $DEBUG;
}
sayGreen "Removed baseurls from url_to_fetch: ".scalar @toBeDeletedFromFetchAgain;
while(my @row = $query->fetchrow_array) {
$urlsToFetch{$row[0]} = $row[1];
}
-$query->finish();
+#$query->finish();
# successful fetches
my @urlsFetched;
$query->bind_param(1,$idToUpdate);
$query->execute();
}
- $query->finish();
+ #$query->finish();
sayGreen "Update fetch timestamps done";
}
$query->bind_param(1,$idToUpdate);
$query->execute();
}
- $query->finish();
+ #$query->finish();
sayGreen "Update fetch failed done";
}
while(my @row = $query->fetchrow_array) {
$baseUrls{$row[0]} = $row[1];
}
-$query->finish();
+#$query->finish();
# get the string to ignore
while(my @row = $query->fetchrow) {
push(@urlStringsToIgnore, $row[0])
}
-$query->finish();
+#$query->finish();
## prepare linkExtor
#sayLog "Inserted: $link" if($DEBUG);
}
- $query->finish();
+ #$query->finish();
}