From 75932319e59f704155fda65ba77ec70a5e9670ad Mon Sep 17 00:00:00 2001 From: Banana Date: Mon, 21 Aug 2017 21:59:07 +0200 Subject: [PATCH] reading emails and extrating its contents. simple and plain only yet. --- documentation/email-importer.txt | 7 +- webroot/job/email-import.php | 11 ++- webroot/job/error.log | 8 +++ webroot/lib/simple-imap.class.php | 110 +++++++++++++++++++++++------- 4 files changed, 108 insertions(+), 28 deletions(-) diff --git a/documentation/email-importer.txt b/documentation/email-importer.txt index 6b813e9..167661a 100644 --- a/documentation/email-importer.txt +++ b/documentation/email-importer.txt @@ -8,4 +8,9 @@ Set the config variables in the config file. Make sure you an individual marker string! There is no "security" within this method. Only the special string you can define. The new links will be hidden at first. You need to verify them before they are -visible in your list. \ No newline at end of file +visible in your list. + +Syntax of the email body: + +absolute-link|multiple,category,strings|multiple,tag,strings\n +new-absolute-link|multiple,category,strings|multiple,tag,strings\n diff --git a/webroot/job/email-import.php b/webroot/job/email-import.php index f599062..1d2ed4f 100644 --- a/webroot/job/email-import.php +++ b/webroot/job/email-import.php @@ -65,20 +65,29 @@ $DB->query("SET collation_connection = 'utf8mb4_bin'"); # the email reader $EmailReader = new SimpleImap(); +$emaildata = array(); try { $EmailReader->connect(); #if(DEBUG === true) {$EmailReader->mailboxStatus();} } catch (Exception $e) { error_log('Email server connection failed: '.var_export($e->getMessage(),true)); + exit(); } try { - $EmailReader->process(); + $emaildata = $EmailReader->bodyFromMessagesWithSubject(EMAIL_MARKER); } catch (Exception $e) { error_log('Can not process email messages: '.var_export($e->getMessage(),true)); + exit(); } +# process the emaildata and then move the emails +var_dump($emaildata); + +# move them to the processed / archive folder +#$EmailReader->move() + $DB->close(); # END \ No newline at end of file diff --git a/webroot/job/error.log b/webroot/job/error.log index 4174bee..c4ba716 100644 --- a/webroot/job/error.log +++ b/webroot/job/error.log @@ -71,3 +71,11 @@ Stack trace: [20-Aug-2017 21:06:15 Europe/Berlin] PHP Notice: Trying to get property of non-object in /home/banana/code/insipid/webroot/lib/simple-imap.class.php on line 77 [20-Aug-2017 21:06:15 Europe/Berlin] PHP Notice: Trying to get property of non-object in /home/banana/code/insipid/webroot/lib/simple-imap.class.php on line 77 [20-Aug-2017 21:06:15 Europe/Berlin] PHP Notice: Trying to get property of non-object in /home/banana/code/insipid/webroot/lib/simple-imap.class.php on line 77 +[21-Aug-2017 21:35:20 Europe/Berlin] PHP Warning: imap_utf7_decode(): Invalid modified UTF-7 character: ` ' in /home/banana/code/insipid/webroot/lib/simple-imap.class.php on line 168 +[21-Aug-2017 21:35:20 Europe/Berlin] PHP Warning: imap_utf7_decode(): Invalid modified UTF-7 character: ` ' in /home/banana/code/insipid/webroot/lib/simple-imap.class.php on line 168 +[21-Aug-2017 21:57:07 Europe/Berlin] Read 19 messages +[21-Aug-2017 21:57:07 Europe/Berlin] Processed 2 messages +[21-Aug-2017 21:57:33 Europe/Berlin] Read 19 messages +[21-Aug-2017 21:57:33 Europe/Berlin] Processed 2 messages +[21-Aug-2017 21:58:00 Europe/Berlin] Read 19 messages +[21-Aug-2017 21:58:00 Europe/Berlin] Processed 2 messages diff --git a/webroot/lib/simple-imap.class.php b/webroot/lib/simple-imap.class.php index 7ff2be9..f2a21de 100644 --- a/webroot/lib/simple-imap.class.php +++ b/webroot/lib/simple-imap.class.php @@ -30,8 +30,6 @@ class SimpleImap { imap_close($this->_connection); } - - /** * connect to the e-mail server * with this code SSL/TLS only @@ -60,50 +58,40 @@ class SimpleImap { /** * process the given mailbox and check for the special messages + * return the bodies from the found messages as an array + * @param string $subjectmarker */ - function process() { + function bodyFromMessagesWithSubject($subjectmarker) { + $ret = array(); + $messagecount = imap_num_msg($this->_connection); if($messagecount === false) { throw new Exception('Can not read the messages in given mailbox'); } - $messages = array(); - + $processedmessagescount = 0; for($i = 1; $i <= $messagecount; $i++) { - $subject = ''; - - # first we check the header. - - # extract the subject.... - $headerinfo = imap_rfc822_parse_headers(imap_fetchheader($this->_connection, $i)); - $subjectArr = imap_mime_header_decode($headerinfo->subject); - foreach ($subjectArr as $el) { - $subject .= $el->text; - } + $subject = $this->_extractSubject($i); if(!empty($subject)) { # check the special stuff $markerextract = substr($subject, 0, strlen(EMAIL_MARKER)); if($markerextract == EMAIL_MARKER) { + $processedmessagescount++; # valid message - var_dump($subject); + # get the body + $ret[$i] = $this->_extractBody($i); } } - - /* - $messages[] = array( - 'index' => $i, - 'header' => $headerinfo, - 'body' => imap_qprint(imap_body($this->_connection, $i)), - 'structure' => imap_fetchstructure($this->_connection, $i) - ); - */ } # log messages processed to all messages + error_log("Read ".$messagecount." messages"); + error_log("Processed ".$processedmessagescount." messages"); + + return $ret; - #var_dump($messages); } /** @@ -115,6 +103,7 @@ class SimpleImap { public function mailboxStatus() { if($this->_connection !== false) { $status = imap_status($this->_connection, $this->_connectionstring.$this->_mailbox, SA_ALL); + var_dump("messages ".$status->messages); var_dump("recent ".$status->recent); var_dump("unseen ".$status->unseen); @@ -136,6 +125,75 @@ class SimpleImap { } + /** + * extract the subject from the email headers and decode + * A subject can be split into multiple parts... + * + * @param int $messagenum + * @return string + */ + private function _extractSubject($messagenum) { + $ret = ''; + + $headerinfo = imap_rfc822_parse_headers(imap_fetchheader($this->_connection, $messagenum)); + $subjectArr = imap_mime_header_decode($headerinfo->subject); + foreach ($subjectArr as $el) { + $ret .= $el->text; + } + + return $ret; + } + + /** + * extract the body of the given message + * @see http://php.net/manual/en/function.imap-fetchstructure.php + * + * @param int $messagenum + * @return string + */ + private function _extractBody($messagenum) { + $ret = ''; + + $emailstructure = imap_fetchstructure($this->_connection, $messagenum); + + # simple or multipart? + if(isset($emailstructure->parts)) { + exit("multipart todo"); + } + else { + $body = imap_body($this->_connection, $messagenum); + } + + # encoding + switch ($emailstructure->encoding) { + case ENC8BIT: # 1 8BIT + $ret = quoted_printable_decode(imap_8bit($body)); + break; + + case ENCBINARY: # 2 BINARY + $ret = imap_binary($body); + break; + + case ENCBASE64: # 3 BASE64 + $ret = imap_base64($body); + break; + + case ENCQUOTEDPRINTABLE: # 4 QUOTED-PRINTABLE + $ret = quoted_printable_decode($body); + break; + + case ENC7BIT: # 0 7BIT + $ret = imap_qprint($body); + break; + + case ENCOTHER: # 5 OTHER + + default: # UNKNOWN + $ret = $body; + } + + return $ret; + } // move the message to a new folder -- 2.39.5