googlebookparser.class.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. <?php
  2. /**
  3. * Bibliotheca
  4. *
  5. * Copyright 2018-2023 Johannes Keßler
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see http://www.gnu.org/licenses/gpl-3.0.
  19. */
  20. /**
  21. * Class GoogleBooks
  22. *
  23. * Search for book information with google books
  24. *
  25. * https://developers.google.com/books/docs/overview
  26. *
  27. * possible alternative if google does limit the access: https://openlibrary.org/dev/docs/api/books
  28. */
  29. class GoogleBooks {
  30. /**
  31. * @var String The google api endpoint
  32. */
  33. private string $_VOLUMES_ENDPOINT = 'https://www.googleapis.com/books/v1/volumes';
  34. /**
  35. * @var bool DEBUG
  36. */
  37. private bool $_DEBUG = false;
  38. /**
  39. * @var string The user agent used to make curl calls
  40. */
  41. private string $_BROWSER_AGENT = '';
  42. /**
  43. * @var string The user agent lang used to make curl calls
  44. */
  45. private string $_BROWSER_LANG = '';
  46. /**
  47. * @var string The user agent accept used to make curl calls
  48. */
  49. private string $_BROWSER_ACCEPT = '';
  50. public function __construct(array $options) {
  51. if(isset($options['debug']) && !empty($options['debug'])) {
  52. $this->_DEBUG = true;
  53. }
  54. $this->_BROWSER_AGENT = $options['browserAgent'];
  55. $this->_BROWSER_LANG = $options['browserLang'];
  56. $this->_BROWSER_ACCEPT = $options['browserAccept'];
  57. }
  58. /**
  59. * Use a given ISBN and query the google books API with it.
  60. * https://developers.google.com/books/docs/overview
  61. * for example: https://www.googleapis.com/books/v1/volumes?q=isbn:9780812972153
  62. */
  63. public function searchForISBN(string $isbn) : array {
  64. $data = array();
  65. if(!empty($isbn)) {
  66. $isbn = urlencode($isbn);
  67. $url = $this->_VOLUMES_ENDPOINT;
  68. $url .= '?q=isbn:'.$isbn;
  69. if(DEBUG) Summoner::sysLog("[DEBUG] ".__METHOD__." isbn query url: $url");
  70. $do = $this->_curlCall($url);
  71. if(!empty($do)) {
  72. $data = json_decode($do, true);
  73. if(!empty($data)) {
  74. if(DEBUG) Summoner::sysLog("[DEBUG] ".__METHOD__." isbn json data:".Summoner::cleanForLog($data));
  75. $data = $this->_buildDataFromISBNsearch($data);
  76. }
  77. else {
  78. Summoner::sysLog("[ERROR] ".__METHOD__." invalid isbn json data:".Summoner::cleanForLog($do));
  79. }
  80. }
  81. }
  82. return $data;
  83. }
  84. /**
  85. * Download given URL to a tmp file
  86. * make sure to remove the tmp file after use
  87. *
  88. * @param string $url
  89. * @return string
  90. */
  91. public function downloadCover(string $url): string {
  92. $ret = '';
  93. // replace zoom=1 with zoom=0 or even remove to get the full picture
  94. // http://books.google.com/books/content?id=yyaxyKjyp2YC&printsec=frontcover&img=1&zoom=1&source=gbs_api
  95. $url = str_replace("zoom=1", "zoom=0",$url);
  96. $_tmpFile = tempnam(sys_get_temp_dir(), "bibliotheca-");
  97. $fh = fopen($_tmpFile,"w+");
  98. if($this->_DEBUG) {
  99. Summoner::sysLog('[DEBUG] '.__METHOD__.' url '.Summoner::cleanForLog($url));
  100. }
  101. if($fh !== false) {
  102. // modified curl call for fetching an image
  103. $ch = curl_init($url);
  104. curl_setopt($ch, CURLOPT_FILE, $fh);
  105. curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
  106. curl_setopt($ch, CURLOPT_TIMEOUT, 30);
  107. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  108. curl_setopt($ch, CURLOPT_MAXREDIRS, 3);
  109. curl_setopt($ch, CURLOPT_USERAGENT, $this->_BROWSER_AGENT);
  110. curl_exec($ch);
  111. curl_close($ch);
  112. $ret = $_tmpFile;
  113. }
  114. fclose($fh);
  115. return $ret;
  116. }
  117. /**
  118. * Use the given isb search data and build a nice return array
  119. * Since the search is for a isbn, there should be only one result
  120. *
  121. * @param array $rawData
  122. * @return array
  123. */
  124. private function _buildDataFromISBNsearch(array $rawData) : array {
  125. $data = array();
  126. if(!empty($rawData) && isset($rawData['items'][0]['volumeInfo'])) {
  127. $_d = $rawData['items'][0]['volumeInfo'];
  128. $data['title'] = $_d['title'] ?? '';
  129. $data['subtitle'] = $_d['subtitle'] ?? '';
  130. $data['publisher'] = $_d['publisher'] ?? '';
  131. $data['publishedDate'] = $_d['publishedDate'] ?? '';
  132. $data['description'] = $_d['description'] ?? '';
  133. $data['authors'] = isset($_d['authors']) ? implode(",", $_d['authors']) : '';
  134. $data['categories'] = isset($_d['categories']) ? implode(",", $_d['categories']) : '';
  135. $data['cover'] = $_d['imageLinks']['thumbnail'] ?? '';
  136. $data['isbn'] = '';
  137. if(isset($_d['industryIdentifiers']) && is_array($_d['industryIdentifiers'])) {
  138. foreach($_d['industryIdentifiers'] as $k=>$v) {
  139. if($v['type'] == "ISBN_13") {
  140. $data['isbn'] = $v['identifier'];
  141. }
  142. }
  143. }
  144. }
  145. return $data;
  146. }
  147. /**
  148. * execute a curl call to the given $url
  149. *
  150. * @param string $url The request url
  151. * @return string
  152. */
  153. private function _curlCall(string $url): string {
  154. $ret = '';
  155. $ch = curl_init();
  156. curl_setopt($ch, CURLOPT_URL, $url);
  157. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  158. curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
  159. curl_setopt($ch, CURLOPT_TIMEOUT, 30);
  160. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  161. curl_setopt($ch, CURLOPT_MAXREDIRS, 2);
  162. curl_setopt($ch, CURLOPT_USERAGENT, $this->_BROWSER_AGENT);
  163. curl_setopt($ch, CURLOPT_HTTPHEADER, array(
  164. 'Accept: '.$this->_BROWSER_ACCEPT,
  165. 'Accept-Language: '.$this->_BROWSER_LANG)
  166. );
  167. $_headers = array();
  168. if($this->_DEBUG) {
  169. curl_setopt($ch, CURLOPT_VERBOSE, true);
  170. curl_setopt($ch, CURLOPT_HEADERFUNCTION,
  171. function($curl, $header) use (&$_headers) {
  172. $len = strlen($header);
  173. $header = explode(':', $header, 2);
  174. if (count($header) < 2) { // ignore invalid headers
  175. return $len;
  176. }
  177. $_headers[strtolower(trim($header[0]))][] = trim($header[1]);
  178. return $len;
  179. }
  180. );
  181. }
  182. $do = curl_exec($ch);
  183. if(is_string($do) === true) {
  184. $ret = $do;
  185. }
  186. curl_close($ch);
  187. if($this->_DEBUG) {
  188. Summoner::sysLog('[DEBUG] '.__METHOD__.' headers '.Summoner::cleanForLog($_headers));
  189. }
  190. return $ret;
  191. }
  192. }