From c999fb728f8d990297b66ae8c90bc2e237fe1e0e Mon Sep 17 00:00:00 2001 From: Banana Date: Sun, 10 Dec 2023 18:52:41 +0100 Subject: [PATCH] updated PHP-IMDB-Grabber --- CHANGELOG | 4 +- sources/imdb.class.php.txt | 938 +++++++++++++++++++++++--- webclient/lib/imdbwebparser.class.php | 63 +- 3 files changed, 892 insertions(+), 113 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 311f4f2..d26def9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,7 +7,9 @@ * Fixed: #26 98 theme. max column with * Fixed: #25 98 design. Sort direction select does not show the correct selected option * Fixed: #23 basic search result order - * Fixed: Buledit in 98 theme and default + * Fixed: Bulkedit in 98 theme and default + * Updated: PHP-IMDB-Grabber to https://github.com/FabianBeiner/PHP-IMDB-Grabber/releases/tag/v6.2.1 + With some heavy modifications 1.5 - Sacred Passage 2023-05-01 * Added google books parser. See upgrade file for more infos. diff --git a/sources/imdb.class.php.txt b/sources/imdb.class.php.txt index 981349b..86a8a55 100644 --- a/sources/imdb.class.php.txt +++ b/sources/imdb.class.php.txt @@ -13,7 +13,7 @@ * @author Fabian Beiner * @license https://opensource.org/licenses/MIT The MIT License * @link https://github.com/FabianBeiner/PHP-IMDB-Grabber/ GitHub Repository - * @version 6.1.7 + * @version 6.2.0 */ class IMDB { @@ -27,6 +27,23 @@ class IMDB */ const IMDB_LANG = 'en-US,en;q=0.9'; + /** + * Set this to true if you want to start with normal search and + * if you get no result, it will use the advanced method + */ + const IMDB_SEARCH_ORIGINAL = true; + + /** + * Set this to true if you want to search for exact titles + * it falls back to false if theres no result + */ + const IMDB_EXACT_SEARCH = true; + + /** + * Set the sensitivity for search results in percentage. + */ + const IMDB_SENSITIVITY = 85; + /** * Define the timeout for cURL requests. */ @@ -45,9 +62,12 @@ class IMDB const IMDB_CERTIFICATION = '~]*>\s*Certification\s*\s*(.+)~Ui'; const IMDB_CHAR = '~(?:\s+)
(.*)(?:\s+)(?: /| \(.*\)|<\/div>)~Ui'; const IMDB_COLOR = '~(.*)<\/a>~Ui'; - const IMDB_COMPANY = '~href="[^"]*update=[t0-9]+:production_companies[^"]*">Edit\s*\s*.+(.*)~Ui'; + const IMDB_COMPANIES = '~production_companies&ref_=(?:.*)">Edit\s+\s+
    (.*)Distributors~Uis'; + const IMDB_COMPANY = '~
  • \s+(.*?)~'; const IMDB_COUNTRY = '~(.*)~Ui'; const IMDB_CREATOR = '~]*>\s*(?:Creator|Creators)\s*:\s*]*>(.+)
~Uxsi'; + const IMDB_DISTRIBUTOR = '@href="[^"]*update=[t0-9]+:distributors[^"]*">Edit\s*\s*(.*):special_effects_companies@Uis'; + const IMDB_DISTRIBUTORS = '@\/company\/(co[0-9]+)\/">(.*?)<\/a>\s+(?:\(([0-9]+)\))?\s+(?:\((.*?)\))?\s+(?:\((.*?)\))?\s+(?:\((?:.*?)\))?\s+@'; const IMDB_DIRECTOR = '~]*>\s*(?:Director|Directors)\s*:\s*]*>(.+)~Uxsi'; const IMDB_GENRE = '~href="/genre/([a-zA-Z_-]*)/?">([a-zA-Z_ -]*)~Ui'; const IMDB_GROSS = '~pl-zebra-list__label">Cumulative Worldwide Gross<\/td>\s+\s+(.*)\s+<~Uxsi'; @@ -56,23 +76,32 @@ class IMDB const IMDB_LOCATION = '~href="\/search\/title\?locations=(.*)">(.*)<\/a>~Ui'; const IMDB_LOCATIONS = '~href="\/search\/title\?locations=[^>]*>\s?(.*)\s?<\/a>[^"]*
\s?(.*)\s<\/dd>~Ui'; const IMDB_MPAA = '~
  • (?:\s+)(TV-Y|TV-Y7|TV-G|TV-PG|TV-14|TV-MA|G|PG|PG-13|R|NC-17|NR|UR)(?:\s+)<\/li>~Ui'; + const IMDB_MUSIC = '~Music by\s*<\/h4>.*(.*)
    ~Us'; const IMDB_NAME = '~href="/name/(.+)/?(?:\?[^"]*)?"[^>]*>(.+)~Ui'; - const IMDB_DESCRIPTION = '~
    \s+
    (.*)
    \s+
    ~Ui'; - const IMDB_NOT_FOUND = '~

    No results found for ~Ui'; - const IMDB_PLOT = '~]*>\s*Plot\s*Summary\s*\s*\s*

    (.+)

    ~Ui'; + const IMDB_MOVIE_DESC = '~
    \s+
    \s+(.*)\s*?
    \s+
    \s+
    ~Ui'; + const IMDB_SERIES_DESC = '~
    \s+(?:.*?\s+\s+
    \s+
    \s+
    \s+)(.*)\s+
    \s+
    \s+
    ~Ui'; + const IMDB_SERIESEP_DESC = '~All Episodes(?:.*?)

  • \s+(?:.*?)?\s+\s+
    \s+
    \s+
    \s+(.*?)\s+
    \s+
    ~'; + const IMDB_NOT_FOUND_ADV = '~No results.~Ui'; + const IMDB_NOT_FOUND_DES = 'Know what this is about'; + const IMDB_NOT_FOUND_ORG = '~

    No results found for ~Ui'; + const IMDB_PLOT = '~]*>\s*Plot\s*Summary\s*\s*\s*

    \s*(.*)\s*

    ~Ui'; const IMDB_PLOT_KEYWORDS = '~]*>Plot\s*Keywords\s*(.+)(?:]*>[^<]*\s*\s*\s*)?~Ui'; const IMDB_POSTER = '~~Ui'; const IMDB_RATING = '~class="ipl-rating-star__rating">(.*)<~Ui'; const IMDB_RATING_COUNT = '~class="ipl-rating-star__total-votes">\((.*)\)<~Ui'; const IMDB_RELEASE_DATE = '~href="/title/[t0-9]*/releaseinfo">(.*)<~Ui'; const IMDB_RUNTIME = '~]*>\s*Runtime\s*\s*(.+)~Ui'; - const IMDB_SEARCH = '~ (?:.*)<\/a>~Ui'; + const IMDB_SEARCH_ADV = '~text-primary">1[.]\s*(?:.*?)<\/a>~Ui'; + const IMDB_SEARCH_ORG = '~find-title-result">(?:.*?)alt="(.*?)"(?:.*?)href="\/title\/(tt\d{6,})\/(?:.*?)">(.*?)<\/a>~'; const IMDB_SEASONS = '~episodes\?season=(?:\d+)">(\d+)<~Ui'; const IMDB_SOUND_MIX = '~]*>\s*Sound\s*Mix\s*\s*(.+)~Ui'; const IMDB_TAGLINE = '~]*>\s*Taglines\s*\s*(.+)~Ui'; const IMDB_TITLE = '~itemprop="name">(.*)(<\/h3>| 0) { $this->iCache = (int) $iCache; } - $this->fetchUrl($sSearch); + + if (self::IMDB_EXACT_SEARCH) { + if ($this->fetchUrl($sSearch, self::IMDB_SEARCH_ORIGINAL, true)) { + return true; + } + } + + if ($this->fetchUrl($sSearch, self::IMDB_SEARCH_ORIGINAL)) { + return true; + } + + if ($this->fetchUrl($sSearch, !self::IMDB_SEARCH_ORIGINAL)) { + return true; + } + + } /** @@ -180,7 +225,7 @@ class IMDB * * @return bool True on success, false on failure. */ - private function fetchUrl($sSearch) + private function fetchUrl($sSearch, $orgSearch = false, $exactSearch = false) { $sSearch = trim($sSearch); @@ -191,24 +236,66 @@ class IMDB $this->sUrl = 'https://www.imdb.com/title/tt' . $this->iId . '/reference'; $bSearch = false; } else { - switch (strtolower($this->sSearchFor)) { - case 'movie': - $sParameters = '&s=tt&ttype=ft'; - break; - case 'tv': - $sParameters = '&s=tt&ttype=tv'; - break; - case 'episode': - $sParameters = '&s=tt&ttype=ep'; - break; - case 'game': - $sParameters = '&s=tt&ttype=vg'; - break; - default: - $sParameters = '&s=tt'; - } - - $this->sUrl = 'https://www.imdb.com/find?q=' . rawurlencode(str_replace(' ', '+', $sSearch)) . $sParameters; + if (!$orgSearch) { + switch (strtolower($this->sSearchFor)) { + case 'movie': + $sParameters = '&title_type=feature'; + break; + case 'tv': + $sParameters = '&title_type=tv_movie,tv_series,tv_special,tv_miniseries'; + break; + case 'episode': + $sParameters = '&title_type=tv_episode'; + break; + case 'game': + $sParameters = '&title_type=video_game'; + break; + case 'documentary': + $sParameters = '&title_type=documentary'; + break; + case 'video': + $sParameters = '&title_type=video'; + break; + default: + $sParameters = ''; + } + + if (preg_match('~([^0-9+])\(?([0-9]{4})\)?~', $sSearch, $fMatch)) { + $sParameters .= '&release_date=' . $fMatch[2] . '-01-01,' . $fMatch[2] . '-12-31'; + $sSearch = preg_replace('~([^0-9+])\(?([0-9]{4})\)?~','', $sSearch); + } + + $this->sUrl = 'https://www.imdb.com/search/title/?title=' . rawurlencode(str_replace(' ', '+', $sSearch)) . $sParameters; + } else { + switch (strtolower($this->sSearchFor)) { + case 'movie': + $sParameters = '&s=tt&ttype=ft'; + break; + case 'tv': + $sParameters = '&s=tt&ttype=tv'; + break; + case 'episode': + $sParameters = '&s=tt&ttype=ep'; + break; + case 'game': + $sParameters = '&s=tt&ttype=vg'; + break; + default: + $sParameters = '&s=tt'; + } + + if (preg_match('~([^0-9+])\(?([0-9]{4})\)?~', $sSearch, $fMatch)) { + $sYear = $fMatch[2]; + $sTempSearch = preg_replace('~([^0-9+])\(?([0-9]{4})\)?~','', $sSearch); + $sSearch = $sTempSearch . ' (' . $sYear . ')'; + } + + if ($exactSearch) { + $sParameters .= '&exact=true'; + } + $this->sUrl = 'https://www.imdb.com/find/?q=' . rawurlencode(str_replace(' ', ' ', $sSearch)) . $sParameters; + } + $bSearch = true; // Was this search already performed and cached? @@ -225,17 +312,19 @@ class IMDB } // Does a cache of this movie exist? - $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '.cache'; - if (is_readable($sCacheFile)) { - $iDiff = round(abs(time() - filemtime($sCacheFile)) / 60); - if ($iDiff < $this->iCache) { - if (true === self::IMDB_DEBUG) { - echo '
    Using cache: ' . basename($sCacheFile) . '
    '; - } - $this->sSource = file_get_contents($sCacheFile); - $this->isReady = true; + if (! is_null($this->iId)) { + $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '.cache'; + if (is_readable($sCacheFile)) { + $iDiff = round(abs(time() - filemtime($sCacheFile)) / 60); + if ($iDiff < $this->iCache) { + if (true === self::IMDB_DEBUG) { + echo '
    Using cache: ' . basename($sCacheFile) . '
    '; + } + $this->sSource = file_get_contents($sCacheFile); + $this->isReady = true; - return true; + return true; + } } } @@ -255,23 +344,122 @@ class IMDB return false; } - // Was the movie found? - $sMatch = IMDBHelper::matchRegex($sSource, self::IMDB_SEARCH, 1); - if (false !== $sMatch) { - $sUrl = 'https://www.imdb.com/title/' . $sMatch . '/reference'; - if (true === self::IMDB_DEBUG) { - echo '
    New redirect saved: ' . basename($sRedirectFile) . ' => ' . $sUrl . '
    '; + if (!$orgSearch) { + // Was the movie found? + $sMatch = IMDBHelper::matchRegex($sSource, self::IMDB_SEARCH_ADV, 1); + if (false !== $sMatch) { + $sUrl = 'https://www.imdb.com/title/' . $sMatch . '/reference'; + if (true === self::IMDB_DEBUG) { + echo '
    New redirect saved: ' . basename($sRedirectFile) . ' => ' . $sUrl . '
    '; + } + file_put_contents($sRedirectFile, $sUrl); + $this->sSource = null; + self::fetchUrl($sUrl); + + return true; + } + $sMatch = IMDBHelper::matchRegex($sSource, self::IMDB_NOT_FOUND_ADV, 0); + if (false !== $sMatch) { + if (true === self::IMDB_DEBUG) { + echo '
    Movie not found: ' . $sSearch . '
    '; + } + + return false; } - file_put_contents($sRedirectFile, $sUrl); - $this->sSource = null; - self::fetchUrl($sUrl); + } else { + $aReturned = IMDBHelper::matchRegex($sSource, self::IMDB_SEARCH_ORG); - return true; - } - $sMatch = IMDBHelper::matchRegex($sSource, self::IMDB_NOT_FOUND, 0); - if (false !== $sMatch) { - if (true === self::IMDB_DEBUG) { - echo '
    Movie not found: ' . $sSearch . '
    '; + if ($aReturned) { + $rData = []; + $fTempPercent = 0.00; + $iTempId = ""; + $sYear = 0; + + if (preg_match('~([^0-9+])\(?([0-9]{4})\)?~', $sSearch, $fMatch)) { + $sYear = $fMatch[2]; + $sTempSearch = preg_replace('~([^0-9+])\(?([0-9]{4})\)?~','', $sSearch); + if (true === self::IMDB_DEBUG) { + echo '
    YEAR: ' . $sTempSearch . ' =>  ' . $sYear . '
    '; + } + } + + foreach ($aReturned[1] as $i => $value) { + $sId = $aReturned[2][$i]; + $sTitle = $aReturned[3][$i]; + $perc = 0.00; + $year = 0; + + if ($sYear === 0) { + $sim = similar_text($sSearch, $sTitle, $perc); + } else { + $sMatch = IMDBHelper::matchRegex($aReturned[1][$i], '~\(?([0-9]{4})\)?~', 1); + if (false !== $sMatch) { + $year = $sMatch; + } + + if ($sYear != $year) { + continue; + } + + $sim = similar_text($sTempSearch, $sTitle, $perc); + } + + $rData[] = [ + 'id' => $sId, + 'title' => $sTitle, + 'year' => $year, + 'match' => floatval($perc) + ]; + + + } + + if (sizeof($rData) === 0) { + return false; + } + + if (true === self::IMDB_DEBUG) { + foreach ($rData as $sArray) { + echo '
    Found results: ' . $sArray['id'] . ' =>  ' . $sArray['title'] . ' (' . $sArray['match']. '%) 
    '; + } + } + + //get highest match of search results + $matches = array_column($rData, 'match'); + $maxv = max($matches); + + $marray = array_filter($rData, function($item) use ($maxv) { + return $item['match'] == $maxv; + }); + + $marray = reset($marray); + + if (sizeof($marray) > 0) { + if (!$exactSearch && round($marray['match'], 0) < self::IMDB_SENSITIVITY) { + echo '
    Bad sensitivity: ' . $marray['id'] . ' =>  ' . $marray['title'] . ' (' . $marray['match']. '%) 
    '; + return false; + } + + $sUrl = 'https://www.imdb.com/title/' . $marray['id'] . '/reference'; + if (true === self::IMDB_DEBUG) { + echo '
    Get best result: ' . $marray['title'] . ' ' . $marray['id'] . ' =>  ' . $marray['match'] . '% 
    '; + echo '
    New redirect saved: ' . basename($sRedirectFile) . ' => ' . $sUrl . '
    '; + } + file_put_contents($sRedirectFile, $sUrl); + $this->sSource = null; + self::fetchUrl($sUrl); + + return true; + } + } + + $sMatch = IMDBHelper::matchRegex($sSource, self::IMDB_NOT_FOUND_ORG, 0); + if (false !== $sMatch) { + if (true === self::IMDB_DEBUG) { + echo '
    Movie not found: ' . $sSearch . '
    '; + } + + return false; } return false; @@ -324,13 +512,16 @@ class IMDB public function getAka() { if (true === $this->isReady) { + $aReturn = []; $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_AKA, 1); if (false !== $sMatch) { - return IMDBHelper::cleanString($sMatch); + $aReturn[] = explode('|', IMDBHelper::cleanString($sMatch)); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); } } - return self::$sNotFound; + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); } /** @@ -360,7 +551,7 @@ class IMDB } else { $fullAkas = sprintf('https://www.imdb.com/title/tt%s/releaseinfo', $this->iId); $aCurlInfo = IMDBHelper::runCurl($fullAkas); - $sSource = $aCurlInfo['contents']; + $sSource = $aCurlInfo['contents'] ?? false; if (false === $sSource) { if (true === self::IMDB_DEBUG) { @@ -370,11 +561,11 @@ class IMDB return false; } - $aReturned = IMDBHelper::matchRegex($sSource, "~(.*?)<\/td>\s+(.*?)<\/td>~"); + $aReturned = IMDBHelper::matchRegex($sSource, "~(.*?)<\/td>\s+(.*?)<\/td>~"); if ($aReturned) { $aReturn = []; - foreach ($aReturned[1] as $i => $strName) { + foreach ($aReturned[1] ?? [] as $i => $strName) { if (strpos($strName, '(') === false) { $aReturn[] = [ 'title' => IMDBHelper::cleanString($aReturned[2][$i]), @@ -393,6 +584,135 @@ class IMDB return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); } + /** + * Returns meta score + * + * @return string metascore + * @return string reviews + */ + public function getMetaScore() + { + if (true === $this->isReady) { + // Does a cache of this movie exist? + $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '_metascore.cache'; + $bUseCache = false; + + if (is_readable($sCacheFile)) { + $iDiff = round(abs(time() - filemtime($sCacheFile)) / 60); + if ($iDiff < $this->iCache || false) { + $bUseCache = true; + } + } + + if ($bUseCache) { + $aRawReturn = file_get_contents($sCacheFile); + $aReturn = unserialize($aRawReturn); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } else { + $fullCritics = sprintf('https://www.imdb.com/title/tt%s/criticreviews', $this->iId); + $aCurlInfo = IMDBHelper::runCurl($fullCritics); + $sSource = $aCurlInfo['contents'] ?? false; + + if (false === $sSource) { + if (true === self::IMDB_DEBUG) { + echo '
    cURL error: ' . var_dump($aCurlInfo) . '
    '; + } + + return false; + } + + $aReturned = IMDBHelper::matchRegex( + $sSource, + '~metascore_wrap(?:.*)\s+(?:.*)\s+(?:.*)ratingValue\">([0-9]+)<\/span>(?:\s+(?:.*)){4}ratingCount\">([0-9]+)~' + ); + + if ($aReturned) { + $aReturn = []; + $aReturn[] = [ + 'metascore' => IMDBHelper::cleanString($aReturned[1][0]), + 'reviews' => IMDBHelper::cleanString($aReturned[2][0]), + ]; + + file_put_contents($sCacheFile, serialize($aReturn)); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + + /** + * Returns Critic Reviews based on Metascore + * + * @return string rating + * @return string url + * @return string publisher + * @return string author + * @return string review + */ + + public function getMetaCritics() + { + if (true === $this->isReady) { + // Does a cache of this movie exist? + $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '_criticreviews.cache'; + $bUseCache = false; + + if (is_readable($sCacheFile)) { + $iDiff = round(abs(time() - filemtime($sCacheFile)) / 60); + if ($iDiff < $this->iCache || false) { + $bUseCache = true; + } + } + + if ($bUseCache) { + $aRawReturn = file_get_contents($sCacheFile); + $aReturn = unserialize($aRawReturn); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } else { + $fullCritics = sprintf('https://www.imdb.com/title/tt%s/criticreviews', $this->iId); + $aCurlInfo = IMDBHelper::runCurl($fullCritics); + $sSource = $aCurlInfo['contents'] ?? false; + + if (false === $sSource) { + if (true === self::IMDB_DEBUG) { + echo '
    cURL error: ' . var_dump($aCurlInfo) . '
    '; + } + + return false; + } + + $aReturned = IMDBHelper::matchRegex( + $sSource, + '~"ratingValue\">([0-9]+)<\/span>\s+<\/div>\s+<\/td>\s+(?:.*)\s+(?:(.*)<\/span>(?:.*)\"name\">(.*)<\/span><\/span>(?:<\/a>)?\s+(?:.*)\"reviewbody\"> (.*)<\/div>~' + ); + + if ($aReturned) { + $aReturn = []; + foreach ($aReturned[1] as $i => $strScore) { + $aReturn[] = [ + 'rating' => IMDBHelper::cleanString($strScore), + 'url' => IMDBHelper::cleanString($aReturned[2][$i]), + 'publisher' => IMDBHelper::cleanString($aReturned[3][$i]), + 'author' => IMDBHelper::cleanString($aReturned[4][$i]), + 'review' => IMDBHelper::cleanString($aReturned[5][$i]), + ]; + } + + file_put_contents($sCacheFile, serialize($aReturn)); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + /** * @return string “Aspect Ratio” or $sNotFound. */ @@ -595,11 +915,12 @@ class IMDB if (0 !== $iLimit && $i >= $iLimit) { break; } + $sChar = str_replace(' / ', ' and ', $aMatchChar[1][$i]); $aReturn[] = '
    ' . IMDBHelper::cleanString( $sName - ) . ' as ' . IMDBHelper::cleanString($aMatchChar[1][$i]); + ) . ' as ' . IMDBHelper::cleanString($sChar); } $bHaveMore = ($bMore && (count($aMatch[2]) > $iLimit)); @@ -635,7 +956,8 @@ class IMDB if (0 !== $iLimit && $i >= $iLimit) { break; } - $aReturn[] = IMDBHelper::cleanString($sName) . ' as ' . IMDBHelper::cleanString($aMatchChar[1][$i]); + $sChar = str_replace(' / ', ' and ', $aMatchChar[1][$i]); + $aReturn[] = IMDBHelper::cleanString($sName) . ' as ' . IMDBHelper::cleanString($sChar); } $bHaveMore = ($bMore && (count($aMatch[2]) > $iLimit)); @@ -659,13 +981,16 @@ class IMDB public function getCertification() { if (true === $this->isReady) { + $aReturn = []; $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_CERTIFICATION, 1); if (false !== $sMatch) { - return IMDBHelper::cleanString($sMatch); + $aReturn[] = explode('|', IMDBHelper::cleanString($sMatch)); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); } } - return self::$sNotFound; + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); } /** @@ -706,17 +1031,27 @@ class IMDB public function getCompanyAsUrl($sTarget = '') { if (true === $this->isReady) { - $aMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_COMPANY); - if (isset($aMatch[2][0])) { - return '' . IMDBHelper::cleanString( - $aMatch[2][0] - ) . ''; + $aMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_COMPANIES); + $aReturn = []; + if (isset($aMatch[1][0])) { + $bMatch = IMDBHelper::matchRegex($aMatch[1][0], self::IMDB_COMPANY); + if (count($bMatch[2])) { + foreach ($bMatch[2] as $i => $sName) { + + $aReturn[] = '' . IMDBHelper::cleanString( + $sName + ) . ''; + + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } } } - return self::$sNotFound; + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); } /** @@ -808,10 +1143,29 @@ class IMDB public function getDescription() { if (true === $this->isReady) { - $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_DESCRIPTION, 1); + + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_SERIESEP_DESC, 1); if (false !== $sMatch) { - return IMDBHelper::cleanString($sMatch); + if (strpos($sMatch, self::IMDB_NOT_FOUND_DES) === false) { + return IMDBHelper::cleanString($sMatch); + } + } + + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_SERIES_DESC, 1); + if (false !== $sMatch) { + if (strpos($sMatch, self::IMDB_NOT_FOUND_DES) === false) { + return IMDBHelper::cleanString($sMatch); + } + } + + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_MOVIE_DESC, 1); + if (false !== $sMatch) { + if (strpos($sMatch, self::IMDB_NOT_FOUND_DES) === false) { + return IMDBHelper::cleanString($sMatch); + } + } + } return self::$sNotFound; @@ -859,6 +1213,93 @@ class IMDB return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); } + /** + * @param string $sTarget Add a target to the links? + * + * @return array A list (name, url, year, country, type) with Distributors or $sNotFound. + */ + public function getDistributor($iLimit = 0, $bMore = true) + { + if (true === $this->isReady) { + $aMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_DISTRIBUTOR); + $aReturn = []; + if (isset($aMatch[1][0])) { + $bMatch = IMDBHelper::matchRegex($aMatch[1][0], self::IMDB_DISTRIBUTORS); + if (count($bMatch[2])) { + foreach ($bMatch[2] as $i => $sName) { + if (0 !== $iLimit && $i >= $iLimit) { + break; + } + $aReturn[] = [ + 'distributor' => IMDBHelper::cleanString($sName), + 'url' => 'https://www.imdb.com/company/' . IMDBHelper::cleanString($bMatch[1][$i]) .'', + 'year' => IMDBHelper::cleanString($bMatch[3][$i]), + 'country' => IMDBHelper::cleanString($bMatch[4][$i]), + 'type' => IMDBHelper::cleanString($bMatch[5][$i]), + ]; + } + + $bMore = (0 !== $iLimit && $bMore && (count($aMatch[2]) > $iLimit) ? '…' : ''); + + $bHaveMore = ($bMore && (count($aMatch[2]) > $iLimit)); + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn, $bHaveMore); + + } + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + + /** + * @param string $sTarget Add a target to the links? + * + * @return string A list with the linked distributors or $sNotFound. + */ + public function getDistributorAsUrl($sTarget = '') + { + if (true === $this->isReady) { + $aMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_DISTRIBUTOR); + $aReturn = []; + if (isset($aMatch[1][0])) { + $bMatch = IMDBHelper::matchRegex($aMatch[1][0], self::IMDB_DISTRIBUTORS); + if (count($bMatch[2])) { + foreach ($bMatch[2] as $i => $sName) { + + $aReturn[] = '' . IMDBHelper::cleanString( + $sName + ) . ''; + + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + + /** + * @return string The episode title of the tv show or $sNotFound. + */ + public function getEpisodeTitle() + { + if (true === $this->isReady) { + if (preg_match('/Episode/i', $this->getType())) { + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_TITLE_EP, 1); + if (false !== $sMatch && "" !== $sMatch) { + return IMDBHelper::cleanString($sMatch); + } + } + } + + return self::$sNotFound; + } + /** * @return string A list with the genres or $sNotFound. */ @@ -1025,7 +1466,7 @@ class IMDB } else { $fullLocations = sprintf('https://www.imdb.com/title/tt%s/locations', $this->iId); $aCurlInfo = IMDBHelper::runCurl($fullLocations); - $sSource = $aCurlInfo['contents']; + $sSource = $aCurlInfo['contents'] ?? false; if (false === $sSource) { if (true === self::IMDB_DEBUG) { @@ -1077,6 +1518,153 @@ class IMDB return self::$sNotFound; } + /** + * @return string A list with the music composers or $sNotFound. + */ + public function getMusic() + { + if (true === $this->isReady) { + $sMatch = $this->getMusicAsUrl(); + if (self::$sNotFound !== $sMatch) { + return IMDBHelper::cleanString($sMatch); + } + } + + return self::$sNotFound; + } + + /** + * @param string $sTarget Add a target to the links? + * + * @return string A list with the linked music composers or $sNotFound. + */ + public function getMusicAsUrl($sTarget = '') + { + if (true === $this->isReady) { + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_MUSIC, 1); + $aMatch = IMDBHelper::matchRegex($sMatch, self::IMDB_NAME); + $aReturn = []; + if (count($aMatch[2])) { + foreach ($aMatch[2] as $i => $sName) { + $aReturn[] = '' . IMDBHelper::cleanString( + $sName + ) . ''; + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $aReturn); + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + + /** + * @param int $iLimit How many photo images should be returned? + * @param bool $bMore Add … if there are more cast members than printed. + * @param string $sSize small or big images + * + * @return array Array with title and url. + */ + public function getPhotos($iLimit = 0, $bMore = true, $sSize = 'small') + { + if (true === $this->isReady) { + // Does a cache of this movie exist? + $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '_gallery.cache'; + $bUseCache = false; + + if (is_readable($sCacheFile)) { + $iDiff = round(abs(time() - filemtime($sCacheFile)) / 60); + if ($iDiff < $this->iCache || false) { + $bUseCache = true; + } + } + + if ($bUseCache) { + $aRawReturn = file_get_contents($sCacheFile); + $aReturn = unserialize($aRawReturn); + $anReturn = []; + foreach ($aReturn as $i => $sAreturn) { + if (0 !== $iLimit && $i >= $iLimit) { + break; + } + $title = $sAreturn['title']; + $url = $sAreturn['url']; + + if ('big' === strtolower($sSize) && false !== strstr($url, '._')) { + $url = substr($url, 0, strpos($url, '._')) . '.jpg'; + } + + $anReturn[] = [ + 'title' => $title, + 'url' => $url, + ]; + } + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $anReturn); + + } else { + $isPage = true; + $aReturn = []; + $page = 1; + while ($isPage) { + $fullPhotos = sprintf('https://www.imdb.com/title/tt%s/mediaindex?page=%d', $this->iId, $page); + $aCurlInfo = IMDBHelper::runCurl($fullPhotos); + $sSource = $aCurlInfo['contents']; + + if (false === $sSource) { + if (true === self::IMDB_DEBUG) { + echo '
    cURL error: ' . var_dump($aCurlInfo) . '
    '; + } + + return false; + } + + $aReturned = IMDBHelper::matchRegex($sSource, '~title="(.*?)"\s+>~'); + + if ($aReturned) { + + foreach ($aReturned[1] as $i => $strName) { + $aReturn[] = [ + 'title' => IMDBHelper::cleanString($strName), + 'url' => IMDBHelper::cleanString($aReturned[2][$i]), + ]; + } + } + + file_put_contents($sCacheFile, serialize($aReturn)); + if (!preg_match('~class="prevnext"\s>Next~', $sSource)) { + $isPage = false; + } + + $page++; + } + + $anReturn = []; + foreach ($aReturn as $i => $sAreturn) { + if (0 !== $iLimit && $i >= $iLimit) { + break; + } + $title = $sAreturn['title']; + $url = $sAreturn['url']; + + if ('big' === strtolower($sSize) && false !== strstr($url, '._')) { + $url = substr($url, 0, strpos($url, '._')) . '.jpg'; + } + + $anReturn[] = [ + 'title' => $title, + 'url' => $url, + ]; + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $anReturn); + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + /** * @return string A list with the plot keywords or $sNotFound. */ @@ -1126,17 +1714,17 @@ class IMDB if (true === $this->isReady) { $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_POSTER, 1); if (false !== $sMatch) { - if ('big' === strtolower($sSize) && false !== strstr($sMatch, '@._')) { - $sMatch = substr($sMatch, 0, strpos($sMatch, '@._')) . '@.jpg'; + if ('big' === strtolower($sSize) && false !== strstr($sMatch, '._')) { + $sMatch = substr($sMatch, 0, strpos($sMatch, '._')) . '.jpg'; } - if ('xxs' === strtolower($sSize) && false !== strstr($sMatch, '@._')) { - $sMatch = substr($sMatch, 0, strpos($sMatch, '@._')) . '@._V1_UY67_CR0,0,45,67_AL_.jpg'; + if ('xxs' === strtolower($sSize) && false !== strstr($sMatch, '._')) { + $sMatch = substr($sMatch, 0, strpos($sMatch, '._')) . '._V1_UY67_CR0,0,45,67_AL_.jpg'; } - if ('xs' === strtolower($sSize) && false !== strstr($sMatch, '@._')) { - $sMatch = substr($sMatch, 0, strpos($sMatch, '@._')) . '@._V1_UY113_CR0,0,76,113_AL_.jpg'; + if ('xs' === strtolower($sSize) && false !== strstr($sMatch, '._')) { + $sMatch = substr($sMatch, 0, strpos($sMatch, '._')) . '._V1_UY113_CR0,0,76,113_AL_.jpg'; } - if ('s' === strtolower($sSize) && false !== strstr($sMatch, '@._')) { - $sMatch = substr($sMatch, 0, strpos($sMatch, '@._')) . '@._V1_UX182_CR0,0,182,268_AL_.jpg'; + if ('s' === strtolower($sSize) && false !== strstr($sMatch, '._')) { + $sMatch = substr($sMatch, 0, strpos($sMatch, '._')) . '._V1_UX182_CR0,0,182,268_AL_.jpg'; } if (false === $bDownload) { return IMDBHelper::cleanString($sMatch); @@ -1225,7 +1813,7 @@ class IMDB { if (true === $this->isReady) { // Does a cache of this movie exist? - $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '_akas.cache'; + $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '_dates.cache'; $bUseCache = false; if (is_readable($sCacheFile)) { @@ -1243,7 +1831,7 @@ class IMDB } else { $fullAkas = sprintf('https://www.imdb.com/title/tt%s/releaseinfo', $this->iId); $aCurlInfo = IMDBHelper::runCurl($fullAkas); - $sSource = $aCurlInfo['contents']; + $sSource = $aCurlInfo['contents'] ?? false; if (false === $sSource) { if (true === self::IMDB_DEBUG) { @@ -1255,7 +1843,7 @@ class IMDB $aReturned = IMDBHelper::matchRegex( $sSource, - '~>(.*)<\/a><\/td>\s+(.*)<\/td>~' + '~>(.*)\s+<\/a><\/td>\s+(.*)<\/td>~' ); if ($aReturned) { @@ -1386,6 +1974,21 @@ class IMDB return self::$sNotFound; } + /** + * @return string returns the given position at top-250 or $sNotFound. + */ + public function getTop250() + { + if (true === $this->isReady) { + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_TOP250, 1); + if (false !== $sMatch) { + return IMDBHelper::cleanString($sMatch); + } + } + + return self::$sNotFound; + } + /** * @param bool $bEmbed Link to player directly? * @@ -1396,7 +1999,11 @@ class IMDB if (true === $this->isReady) { $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_TRAILER, 1); if (false !== $sMatch) { - $sUrl = 'https://www.imdb.com/video/imdb/' . $sMatch . '/' . ($bEmbed ? 'player' : ''); + if ($bEmbed) { + $sUrl = 'https://www.imdb.com/video/imdb/' . $sMatch . '/imdb/embed'; + } else { + $sUrl = 'https://www.imdb.com/video/' . $sMatch; + } return IMDBHelper::cleanString($sUrl); } @@ -1405,6 +2012,145 @@ class IMDB return self::$sNotFound; } + /** + * @return array Array with season, episode, title, rating, votes, airdate, plot, id. + */ + public function getTVInfo() + { + if (true === $this->isReady) { + // Does a cache of this movie exist? + $sCacheFile = $this->sRoot . '/cache/' . sha1($this->iId) . '_tv.cache'; + $bUseCache = false; + + if (is_readable($sCacheFile)) { + $iDiff = round(abs(time() - filemtime($sCacheFile)) / 60); + if ($iDiff < $this->iCache || false) { + $bUseCache = true; + } + } + + if ($bUseCache) { + $aRawReturn = file_get_contents($sCacheFile); + $aReturn = unserialize($aRawReturn); + $anReturn = []; + foreach ($aReturn as $i => $sAreturn) { + $season = $sAreturn['season']; + $episode = $sAreturn['episode']; + $title = $sAreturn['title']; + $rating = $sAreturn['rating']; + $votes = $sAreturn['votes']; + $airdate = $sAreturn['airdate']; + $plot = $sAreturn['plot']; + $id = $sAreturn['id']; + + $anReturn[] = [ + 'season' => $season, + 'episode' => $episode, + 'title' => $title, + 'rating' => $rating, + 'votes' => $votes, + 'airdate' => $airdate, + 'plot' => $plot, + 'id' => $id + ]; + } + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $anReturn); + + } else { + $isPage = true; + $aReturn = []; + $page = 1; + while ($isPage) { + $fullEpisodes = sprintf('https://www.imdb.com/title/tt%s/episodes/?season=%d', $this->iId, $page); + + $aCurlInfo = IMDBHelper::runCurl($fullEpisodes); + $sSource = $aCurlInfo['contents']; + + if (false === $sSource) { + if (true === self::IMDB_DEBUG) { + echo '
    cURL error: ' . var_dump($aCurlInfo) . '
    '; + } + + return false; + } + + $aSplit = IMDBHelper::matchRegex($sSource, '~
    Rate~s'); + + if ($aSplit) { + foreach ($aSplit[1] as $i => $text) { + $aReturned = IMDBHelper::matchRegex($aSplit[1][$i], '~h4.+/title/(tt\d+)/[?]ref_.+ttep_ep(\d+).+?S\d+\.E\d+ ∙ (.+?)<\/div>.+?(.+?).+?
    (.+?)
    .+?ratingGroup--imdb-rating.+?(.+?).+?>(.+?)<~s'); + if ($aReturned) { + foreach ($aReturned[1] as $n => $episode) { + $aReturn[] = [ + 'season' => $page, + 'episode' => IMDBHelper::cleanString($aReturned[2][$n]), + 'title' => IMDBHelper::cleanString($aReturned[3][$n]), + 'rating' => IMDBHelper::cleanString($aReturned[6][$n]), + 'votes' => IMDBHelper::cleanString($aReturned[7][$n]), + 'airdate' => IMDBHelper::cleanString($aReturned[4][$n]), + 'plot' => IMDBHelper::cleanString($aReturned[5][$n]), + 'id' => IMDBHelper::cleanString($aReturned[1][$n]), + ]; + } + } + } + } + + file_put_contents($sCacheFile, serialize($aReturn)); + if (preg_match('~href="\?season=-1~s', $sSource) || !preg_match('~id="load_next_episodes"~', $sSource)) { + break; + } + + $page++; + } + + $anReturn = []; + foreach ($aReturn as $i => $sAreturn) { + $season = $sAreturn['season']; + $episode = $sAreturn['episode']; + $title = $sAreturn['title']; + $rating = $sAreturn['rating']; + $votes = $sAreturn['votes']; + $airdate = $sAreturn['airdate']; + $plot = $sAreturn['plot']; + $id = $sAreturn['id']; + + $anReturn[] = [ + 'season' => $season, + 'episode' => $episode, + 'title' => $title, + 'rating' => $rating, + 'votes' => $votes, + 'airdate' => $airdate, + 'plot' => $plot, + 'id' => $id + ]; + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound, $anReturn); + } + } + + return IMDBHelper::arrayOutput($this->bArrayOutput, $this->sSeparator, self::$sNotFound); + } + + + /** + * + * @return string type of the title or $sNotFound. + */ + public function getType() + { + if (true === $this->isReady) { + $sMatch = IMDBHelper::matchRegex($this->sSource, self::IMDB_TYPE, 1); + if (false !== $sMatch && "" !== $sMatch) { + return IMDBHelper::cleanString($sMatch); + } + } + + return self::$sNotFound; + } + /** * @return string The IMDb URL. */ @@ -1535,7 +2281,7 @@ class IMDBHelper extends IMDB */ public static function matchRegex($sContent, $sPattern, $iIndex = null) { - preg_match_all($sPattern, $sContent, $aMatches); + preg_match_all($sPattern, $sContent ?? '', $aMatches); if ($aMatches === false) { return false; } @@ -1563,7 +2309,7 @@ class IMDBHelper extends IMDB */ public static function arrayOutput($bArrayOutput, $sSeparator, $sNotFound, $aReturn = null, $bHaveMore = false) { - if ($bArrayOutput) { + if ($bArrayOutput ?? false) { if ($aReturn == null || ! is_array($aReturn)) { return []; } @@ -1615,16 +2361,16 @@ class IMDBHelper extends IMDB '', '', ]; - $sInput = str_replace('', ' | ', $sInput); - $sInput = strip_tags($sInput); - $sInput = str_replace(' ', ' ', $sInput); - $sInput = str_replace($aSearch, $aReplace, $sInput); - $sInput = html_entity_decode($sInput, ENT_QUOTES | ENT_HTML5); - $sInput = preg_replace('/\s+/', ' ', $sInput); - $sInput = trim($sInput); - $sInput = rtrim($sInput, ' |'); - - return ($sInput ? trim($sInput) : self::$sNotFound); + $sInput = str_replace('', ' | ', $sInput ?? ''); + $sInput = strip_tags($sInput ?? ''); + $sInput = str_replace(' ', ' ', $sInput ?? ''); + $sInput = str_replace($aSearch, $aReplace, $sInput ?? ''); + $sInput = html_entity_decode($sInput ?? '', ENT_QUOTES | ENT_HTML5); + $sInput = preg_replace('/\s+/', ' ', $sInput ?? ''); + $sInput = trim($sInput ?? ''); + $sInput = rtrim($sInput ?? '', ' |'); + + return ($sInput ? trim($sInput ?? '') : self::$sNotFound); } /** diff --git a/webclient/lib/imdbwebparser.class.php b/webclient/lib/imdbwebparser.class.php index b80873e..2feca7e 100644 --- a/webclient/lib/imdbwebparser.class.php +++ b/webclient/lib/imdbwebparser.class.php @@ -12,7 +12,7 @@ * @author Fabian Beiner * @license https://opensource.org/licenses/MIT The MIT License * @link https://github.com/FabianBeiner/PHP-IMDB-Grabber/ GitHub Repository - * @version 6.1.7 + * @version 6.2.0 * * * Functionality is the same but modified heavily to remove the does-not-make-sense static helper @@ -31,6 +31,23 @@ class IMDB */ private string $IMDB_BROWSER_LANG; + /** + * Set this to true if you want to start with normal search and + * if you get no result, it will use the advanced method + */ + const IMDB_SEARCH_ORIGINAL = true; + + /** + * Set this to true if you want to search for exact titles + * it falls back to false if theres no result + */ + const IMDB_EXACT_SEARCH = true; + + /** + * Set the sensitivity for search results in percentage. + */ + const IMDB_SENSITIVITY = 85; + /** * @var string The accept string for curl call */ @@ -53,40 +70,53 @@ class IMDB const IMDB_AKA = '~]*>\s*Also\s*Known\s*As\s*\s*(.+)~Uis'; const IMDB_ASPECT_RATIO = '~]*>Aspect\s*Ratio\s*(.+)~Uis'; const IMDB_AWARDS = '~\s*Awards:(.+)~Uis'; - const IMDB_BUDGET = '~]*>Budget\s*\s*(.*)(?:\(estimated\))\s*~Ui'; + const IMDB_BUDGET = '~]*>Budget<\/td>\s*\s*(.*)(?:\(estimated\))\s*<\/td>~Ui'; const IMDB_CAST = '~]*itemprop="actor"[^>]*>\s*]*>\s*(.+)~Ui'; const IMDB_RATING = '~class="ipl-rating-star__rating">(.*)<~Ui'; const IMDB_RATING_COUNT = '~class="ipl-rating-star__total-votes">\((.*)\)<~Ui'; const IMDB_RELEASE_DATE = '~href="/title/[t0-9]*/releaseinfo">(.*)<~Ui'; const IMDB_RUNTIME = '~]*>\s*Runtime\s*\s*(.+)~Ui'; - const IMDB_SEARCH = '~