From 18f8ae712d45b7a0b9988494ce871772a2771e2d Mon Sep 17 00:00:00 2001 From: Banana Date: Sat, 26 Aug 2017 12:10:31 +0200 Subject: [PATCH] parsing the email content for new links. default add process implementet. no data written yet --- documentation/technicalinfo.txt | 5 ++ webroot/job/email-import.php | 107 ++++++++++++++++++++++++++++++ webroot/job/error.log | 28 ++++++++ webroot/lib/simple-imap.class.php | 7 +- webroot/lib/summoner.class.php | 27 ++++++++ 5 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 documentation/technicalinfo.txt diff --git a/documentation/technicalinfo.txt b/documentation/technicalinfo.txt new file mode 100644 index 0000000..f43f020 --- /dev/null +++ b/documentation/technicalinfo.txt @@ -0,0 +1,5 @@ +table link +status = 1|2|3 +1 = private +2 = visible to everyone -> sql queries use this as default +3 = added via email importer. Moderation needed \ No newline at end of file diff --git a/webroot/job/email-import.php b/webroot/job/email-import.php index 1d2ed4f..f69b999 100644 --- a/webroot/job/email-import.php +++ b/webroot/job/email-import.php @@ -55,6 +55,13 @@ else { require('../config.php'); require('../lib/simple-imap.class.php'); +require('../lib/summoner.class.php'); +require('../lib/tag.class.php'); +require('../lib/category.class.php'); +require('../lib/link.class.php'); + +$DB = false; +$Summoner = new Summoner(); ## DB connection mysqli_report(MYSQLI_REPORT_ERROR | MYSQLI_REPORT_STRICT); # throw exeptions @@ -86,6 +93,106 @@ catch (Exception $e) { # process the emaildata and then move the emails var_dump($emaildata); +if(!empty($emaildata)) { + + $links = array(); + foreach($emaildata as $ed) { + $links = array_replace($links,$Summoner::extractEmailLinks($ed)); + } + + $newdata = array(); + if(!empty($links)) { + var_dump($links); + + + foreach($links as $linkstring) { + + # defaults + $newdata['link'] = $linkstring; + $newdata['description'] = ''; + $newdata['title'] = ''; + $newdata['image'] = ''; + $newdata['status'] = '3'; # moderation required + $search = ''; + $tagArr = array(); + $catArr = array(); + $hash = ''; + + if(strstr($linkstring, "|")) { + $_t = explode("|", $linkstring); + $newdata['link'] = $_t[0]; + + $catArr = Summoner::prepareTagOrCategoryStr($_t[1]); + if(isset($_t[2])) { + $tagArr = Summoner::prepareTagOrCategoryStr($_t[2]); + } + } + + $hash = md5($newdata['link']); + + $linkInfo = Summoner::gatherInfoFromURL($newdata['link']); + if(!empty($linkInfo)) { + if(isset($linkInfo['description'])) { + $newdata['description'] = $linkInfo['description']; + } + if(isset($linkInfo['title'])) { + $newdata['title'] = $linkInfo['title']; + } + if(isset($linkInfo['image'])) { + $newdata['image'] = $linkInfo['image']; + } + } + + $search = $newdata['title']; + $search .= ' '.$newdata['description']; + $search .= ' '.implode(" ",$tagArr); + $search .= ' '.implode(" ",$catArr); + + $queryStr = "INSERT IGNORE INTO `".DB_PREFIX."_link` SET + `link` = '".$DB->real_escape_string($newdata['link'])."', + `created` = NOW(), + `status` = '".$DB->real_escape_string($newdata['status'])."', + `description` = '".$DB->real_escape_string($newdata['description'])."', + `title` = '".$DB->real_escape_string($newdata['title'])."', + `image` = '".$DB->real_escape_string($newdata['image'])."', + `hash` = '".$DB->real_escape_string($hash)."', + `search` = '".$DB->real_escape_string($search)."'"; + var_dump($newdata); + var_dump($queryStr); + + /* + $DB->query($queryStr); + $linkID = $DB->insert_id; + + if(!empty($linkID)) { + + if(!empty($catArr)) { + foreach($catArr as $c) { + $catObj = new Category($DB); + $catObj->initbystring($c); + $catObj->setRelation($linkID); + + unset($catObj); + } + } + if(!empty($tagArr)) { + foreach($tagArr as $t) { + $tagObj = new Tag($DB); + $tagObj->initbystring($t); + $tagObj->setRelation($linkID); + + unset($tagObj); + } + } + } + */ + + + } + } + +} + # move them to the processed / archive folder #$EmailReader->move() diff --git a/webroot/job/error.log b/webroot/job/error.log index c4ba716..3fb5ecc 100644 --- a/webroot/job/error.log +++ b/webroot/job/error.log @@ -79,3 +79,31 @@ Stack trace: [21-Aug-2017 21:57:33 Europe/Berlin] Processed 2 messages [21-Aug-2017 21:58:00 Europe/Berlin] Read 19 messages [21-Aug-2017 21:58:00 Europe/Berlin] Processed 2 messages +[26-Aug-2017 10:32:37 Europe/Berlin] Read 21 messages +[26-Aug-2017 10:32:37 Europe/Berlin] Processed 3 messages +[26-Aug-2017 10:53:31 Europe/Berlin] Read 21 messages +[26-Aug-2017 10:53:31 Europe/Berlin] Processed 3 messages +[26-Aug-2017 10:53:31 Europe/Berlin] PHP Warning: preg_match(): Unknown modifier '\' in /home/banana/code/insipid/webroot/lib/summoner.class.php on line 410 +[26-Aug-2017 10:53:31 Europe/Berlin] PHP Warning: preg_match(): Unknown modifier '\' in /home/banana/code/insipid/webroot/lib/summoner.class.php on line 410 +[26-Aug-2017 10:53:31 Europe/Berlin] PHP Warning: preg_match(): Unknown modifier '\' in /home/banana/code/insipid/webroot/lib/summoner.class.php on line 410 +[26-Aug-2017 10:54:21 Europe/Berlin] Read 21 messages +[26-Aug-2017 10:54:21 Europe/Berlin] Processed 3 messages +[26-Aug-2017 10:54:21 Europe/Berlin] PHP Warning: preg_match(): Unknown modifier '{' in /home/banana/code/insipid/webroot/lib/summoner.class.php on line 410 +[26-Aug-2017 10:54:21 Europe/Berlin] PHP Warning: preg_match(): Unknown modifier '{' in /home/banana/code/insipid/webroot/lib/summoner.class.php on line 410 +[26-Aug-2017 10:54:21 Europe/Berlin] PHP Warning: preg_match(): Unknown modifier '{' in /home/banana/code/insipid/webroot/lib/summoner.class.php on line 410 +[26-Aug-2017 10:54:43 Europe/Berlin] Read 21 messages +[26-Aug-2017 10:54:43 Europe/Berlin] Processed 3 messages +[26-Aug-2017 10:55:07 Europe/Berlin] Read 21 messages +[26-Aug-2017 10:55:07 Europe/Berlin] Processed 3 messages +[26-Aug-2017 11:12:38 Europe/Berlin] Read 21 messages +[26-Aug-2017 11:12:38 Europe/Berlin] Processed 3 messages +[26-Aug-2017 11:17:11 Europe/Berlin] Read 21 messages +[26-Aug-2017 11:17:11 Europe/Berlin] Processed 3 messages +[26-Aug-2017 11:57:24 Europe/Berlin] Read 21 messages +[26-Aug-2017 11:57:24 Europe/Berlin] Processed 3 messages +[26-Aug-2017 12:06:18 Europe/Berlin] Read 21 messages +[26-Aug-2017 12:06:18 Europe/Berlin] Processed 3 messages +[26-Aug-2017 12:07:26 Europe/Berlin] Read 21 messages +[26-Aug-2017 12:07:26 Europe/Berlin] Processed 3 messages +[26-Aug-2017 12:08:47 Europe/Berlin] Read 21 messages +[26-Aug-2017 12:08:47 Europe/Berlin] Processed 3 messages diff --git a/webroot/lib/simple-imap.class.php b/webroot/lib/simple-imap.class.php index f2a21de..59e6551 100644 --- a/webroot/lib/simple-imap.class.php +++ b/webroot/lib/simple-imap.class.php @@ -76,8 +76,8 @@ class SimpleImap { if(!empty($subject)) { # check the special stuff - $markerextract = substr($subject, 0, strlen(EMAIL_MARKER)); - if($markerextract == EMAIL_MARKER) { + $markerextract = substr($subject, 0, strlen($subjectmarker)); + if($markerextract == $subjectmarker) { $processedmessagescount++; # valid message # get the body @@ -195,6 +195,7 @@ class SimpleImap { return $ret; } + /* // move the message to a new folder function move($msg_index, $folder='INBOX.Processed') { @@ -217,7 +218,7 @@ class SimpleImap { return $this->_inbox[0]; } - +*/ /** diff --git a/webroot/lib/summoner.class.php b/webroot/lib/summoner.class.php index 588168a..1d4a9f4 100644 --- a/webroot/lib/summoner.class.php +++ b/webroot/lib/summoner.class.php @@ -391,6 +391,33 @@ class Summoner { exit; } } + + /** + * extract from given string (was email body) any links we want to add + * should be in the right format + * return an array with links and the infos about them + * + * new-absolute-link|multiple,category,strings|multiple,tag,strings\n + * + * @param string $string + * @return array $ret + */ + static function extractEmailLinks($string) { + $ret = array(); + + #this matches a valid URL. An URL with | is still valid... + $urlpattern = '#(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))#'; + + preg_match_all($urlpattern, $string, $matches); + if(isset($matches[0]) && !empty($matches[0])) { + foreach($matches[0] as $match) { + $ret[md5($match)] = $match; + } + } + + + return $ret; + } } ?> -- 2.39.5