.buildpath
.project
.settings/
+.idea
--- /dev/null
+https://www.imdb.com/interfaces/
+
+Subsets of IMDb data are available for access to customers for personal and non-commercial use.
+You can hold local copies of this data, and it is subject to our terms and conditions.
+Please refer to the Non-Commercial Licensing
+https://help.imdb.com/article/imdb/general-information/can-i-use-imdb-data-in-my-software/G5JTRESSHJBBHTGX
+and copyright/license and verify compliance.
+https://www.imdb.com/conditions
+
+This will import the imdb dataset tsv into your mysql database for further user.
+Based on the dataset at feb. 2020
+
+As of march 2020
+Title crew looks strange. The longest line is 16313 (wc -L title.crews.tsv)
+therefore the column directors and writers are defined as text and not
+varchar. Do not know if this is an error or correct...
+
+
+This is not a good example to be written in PHP. But you can use it.
+Don't execute it through a webserver. It is a CLI.
\ No newline at end of file
--- /dev/null
+*
+!.gitignore
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * read and create mysql tables based on the tsv data from imdb
+ * dataset format based of feb. 2020
+ */
+
+mb_http_output('UTF-8');
+mb_internal_encoding('UTF-8');
+ini_set('error_reporting',-1); // E_ALL & E_STRICT
+date_default_timezone_set('Europe/Berlin');
+
+## files located in dataset/ directory
+$filesToImport = array(
+ 'TitleAkas' => 'title.akas.tsv',
+ 'TitleBasics' => 'title.basics.tsv',
+ 'TitleCrew' => 'title.crew.tsv',
+ 'TitleEpisode' => 'title.episode.tsv',
+ 'TitlePrincipals' => 'title.principals.tsv',
+ 'TitleRatings' => 'title.ratings.tsv',
+ 'NameBasics' => 'name.basics.tsv'
+);
+
+## database settings
+define('DB_HOST','localhost');
+define('DB_USER','user');
+define('DB_PASSWORD','test');
+define('DB_NAME','imdb');
+
+## DB connection
+$DB = new mysqli(DB_HOST, DB_USER,DB_PASSWORD, DB_NAME);
+if ($DB->connect_errno) exit("Can not connect to MySQL Server\n");
+$DB->set_charset("utf8mb4");
+$DB->query("SET collation_connection = 'utf8mb4_bin'");
+$driver = new mysqli_driver();
+$driver->report_mode = MYSQLI_REPORT_ERROR | MYSQLI_REPORT_STRICT;
+
+require_once 'lib/import.abstract.class.php';
+
+foreach($filesToImport as $key=>$file) {
+ $classFile = $key.'.class.php';
+ $file = 'datasets/'.$file;
+ if(file_exists($file) && is_readable($file) && file_exists('lib/'.$classFile)) {
+ require_once 'lib/'.$classFile;
+ $obj = new $key($DB);
+ $obj->import($file);
+ }
+ else {
+ echo "Required file $file or import class $key not found\n";
+ }
+}
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * Class NameBasics
+ * Import the data from imdb dataset name.basics.tsv
+ */
+class NameBasics extends TSVImport {
+ /**
+ * @inheritDoc
+ */
+ public function setup() {
+ $this->_db_table_name = 'name_basics';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`nconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`primaryName` varchar(128) COLLATE utf8mb4_bin NOT NULL,
+`birthYear` year NOT NULL,
+`deathYear` year NOT NULL,
+`primaryProfession` text COLLATE utf8mb4_bin NOT NULL,
+`knownForTitles` text COLLATE utf8mb4_bin NOT NULL,
+UNIQUE KEY `nconst` (`nconst`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[5])) {
+ return $ret;
+ }
+
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."',
+ '".$this->_DB->real_escape_string($data[3])."',
+ '".$this->_DB->real_escape_string($data[4])."',
+ '".$this->_DB->real_escape_string($data[5])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * Class TitleAkas
+ * Import the data from imdb dataset title.akas.tsv
+ */
+class TitleAkas extends TSVImport {
+
+ public function setup() {
+ $this->_db_table_name = 'title_akas';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`titleId` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`ordering` int NOT NULL,
+`title` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`region` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`language` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`types` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`attributes` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`isOriginalTitle` tinyint(1) NOT NULL,
+UNIQUE KEY `titleId` (`titleId`,`ordering`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[7])) {
+ return $ret;
+ }
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."',
+ '".$this->_DB->real_escape_string($data[3])."',
+ '".$this->_DB->real_escape_string($data[4])."',
+ '".$this->_DB->real_escape_string($data[5])."',
+ '".$this->_DB->real_escape_string($data[6])."',
+ '".$this->_DB->real_escape_string($data[7])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * Class TitleBasics
+ * Import the data from imdb dataset title.basics.tsv
+ */
+class TitleBasics extends TSVImport {
+
+ /**
+ * @inheritDoc
+ */
+ public function setup() {
+ $this->_db_table_name = 'title_basics';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`tconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`titleType` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`primaryTitle` varchar(255) COLLATE utf8mb4_bin NOT NULL,
+`originalTitle` varchar(255) COLLATE utf8mb4_bin NOT NULL,
+`isAdult` tinyint(1) NOT NULL,
+`startYear` char(4) COLLATE utf8mb4_bin NOT NULL,
+`endYear` char(4) COLLATE utf8mb4_bin NOT NULL,
+`runtimeMinutes` int NOT NULL,
+`genres` varchar(255) COLLATE utf8mb4_bin NOT NULL,
+UNIQUE KEY `tconst` (`tconst`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[8])) {
+ return $ret;
+ }
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."',
+ '".$this->_DB->real_escape_string($data[3])."',
+ '".$this->_DB->real_escape_string($data[4])."',
+ '".$this->_DB->real_escape_string($data[5])."',
+ '".$this->_DB->real_escape_string($data[6])."',
+ '".$this->_DB->real_escape_string($data[7])."',
+ '".$this->_DB->real_escape_string($data[8])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * Class TitleCrew
+ * Import the data from imdb dataset title.crew.tsv
+ */
+class TitleCrew extends TSVImport {
+ /**
+ * @inheritDoc
+ */
+ public function setup() {
+ $this->_db_table_name = 'title_crew';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`tconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`directors` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+`writers` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+UNIQUE KEY `tconst` (`tconst`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[2])) {
+ return $ret;
+ }
+
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+* dolphin. Collection of useful PHP skeletons.
+* Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+*
+* https://www.bananas-playground.net
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+*
+* You should have received a copy of the
+* COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+* along with this program. If not, see http://www.sun.com/cddl/cddl.html
+*/
+
+/**
+ * Class TitleEpisode
+ * Import the data from imdb dataset title.episode.tsv
+ */
+class TitleEpisode extends TSVImport {
+ /**
+ * @inheritDoc
+ */
+ public function setup() {
+ $this->_db_table_name = 'title_episode';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`tconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`parentTconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`seasonNumber` int NOT NULL,
+`episodeNumber` int NOT NULL,
+UNIQUE KEY `tconst` (`tconst`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[3])) {
+ return $ret;
+ }
+
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."',
+ '".$this->_DB->real_escape_string($data[3])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+* dolphin. Collection of useful PHP skeletons.
+* Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+*
+* https://www.bananas-playground.net
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+*
+* You should have received a copy of the
+* COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+* along with this program. If not, see http://www.sun.com/cddl/cddl.html
+*/
+
+/**
+ * Class TitlePrincipals
+ * Import the data from imdb dataset title.principals.tsv
+ */
+class TitlePrincipals extends TSVImport {
+ /**
+ * @inheritDoc
+ */
+ public function setup() {
+ $this->_db_table_name = 'title_principals';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`tconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`ordering` int NOT NULL,
+`nconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`category` varchar(128) COLLATE utf8mb4_bin NOT NULL,
+`job` varchar(128) COLLATE utf8mb4_bin NOT NULL,
+`characters` varchar(128) COLLATE utf8mb4_bin NOT NULL,
+UNIQUE KEY `tconst` (`tconst`,`ordering`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[5])) {
+ return $ret;
+ }
+
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."',
+ '".$this->_DB->real_escape_string($data[3])."',
+ '".$this->_DB->real_escape_string($data[4])."',
+ '".$this->_DB->real_escape_string($data[5])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * Class TitleRatings
+ * Import the data from imdb dataset title.ratings.tsv
+ */
+class TitleRatings extends TSVImport {
+ /**
+ * @inheritDoc
+ */
+ public function setup() {
+ $this->_db_table_name = 'title_ratings';
+ $this->_db_table_crate_str = "CREATE TABLE `".$this->_db_table_name."` (
+`tconst` varchar(16) COLLATE utf8mb4_bin NOT NULL,
+`averageRating` varchar(8) COLLATE utf8mb4_bin NOT NULL,
+`numVotes` int NOT NULL,
+UNIQUE KEY `tconst` (`tconst`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin";
+ }
+
+ /**
+ * @inheritDoc
+ */
+ public function queryValuePart($data) {
+ $ret = '';
+
+ if(!empty($data)) {
+ if(!isset($data[2])) {
+ return $ret;
+ }
+
+ $ret .= "(
+ '".$this->_DB->real_escape_string($data[0])."',
+ '".$this->_DB->real_escape_string($data[1])."',
+ '".$this->_DB->real_escape_string($data[2])."'
+ )";
+ }
+
+ return $ret;
+ }
+}
--- /dev/null
+<?php
+/**
+ * dolphin. Collection of useful PHP skeletons.
+ * Copyright (C) 2013-2020 Johannes 'Banana' Keßler
+ *
+ * https://www.bananas-playground.net
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+ *
+ * You should have received a copy of the
+ * COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+ * along with this program. If not, see http://www.sun.com/cddl/cddl.html
+ */
+
+/**
+ * base class with needed methods to make an import
+ * Class TSVImport
+ */
+abstract class TSVImport {
+
+ /**
+ * @var $_DB database object
+ */
+ protected $_DB;
+
+ /**
+ * @var $_db_table_name Tablename
+ */
+ protected $_db_table_name;
+
+ /**
+ * @var $_db_table_crate_str Creation SQL for this table
+ */
+ protected $_db_table_crate_str;
+
+ /**
+ * TSVImport constructor.
+ * @param $db Mysqli database object
+ */
+ public function __construct($db) {
+ $this->_DB = $db;
+ $this->setup();
+ }
+
+ /**
+ * Set the table name into $_db_table_name
+ * Set the CREATE TABLE query into $_db_table_crate_str with the use
+ * of $_db_table_name
+ *
+ * set $_db_table_name $_db_table_crate_str
+ * @return void
+ */
+ abstract public function setup();
+
+ /**
+ * Creates the values port of the insert query
+ * INSERT INTO ... VALUES (1,2,3),(1,2,3),(1,2,3)
+ * Where (1,2,3) is the result of this function
+ *
+ * @param $data array of the tsv line to build the ()
+ * for the values insert query
+ * @return string
+ */
+ abstract public function queryValuePart($data);
+
+ /**
+ * @param $file The TSV file to import
+ * @return bool
+ */
+ public function import($file) {
+ $ret = false;
+
+ echo "Starting to import $file with ".get_class($this)." class\n";
+
+ $check = $this->_checkTable();
+ if($check == false) {
+ echo "Creating needed database table: $this->_db_table_name \n";
+ $this->_createTable();
+ }
+ else {
+ echo "Database table already exists: $this->_db_table_name \n";
+ }
+
+ if(!empty($file)) {
+ if (($handle = fopen($file, "r")) !== FALSE) {
+ // skip first line as it should be column names
+ // the length of the first line should be not so long as the others.
+ fgetcsv($handle, 4000, "\t");
+
+ $linesInFile = $this->_linesInFile($file);
+
+ $queryStrStart = "INSERT IGNORE INTO `".$this->_db_table_name."` VALUES ";
+ $queryStr = '';
+ $total=0;
+ // some files have very long lines... otherwise a length value would be perfect
+ while (($data = fgetcsv($handle, 0, "\t")) !== FALSE) {
+ // invalid lines.
+ $_p = $this->queryValuePart($data);
+ if(!empty($_p)) {
+ $queryStr .= $this->queryValuePart($data).",";
+ $total++;
+
+ if(isset($queryStr[1000000])) {
+ try {
+ $this->_DB->query($queryStrStart . trim($queryStr, ","));
+ $queryStr = '';
+ } catch(Exception $e) {
+ echo "Failure in executing the query. ".$e->getMessage()."\n";
+ var_dump($queryStr);
+ exit();
+ return false;
+ }
+ }
+
+ echo "Inserting: $total/$linesInFile\r";
+ }
+ }
+ if(!empty($queryStr)) {
+ $this->_DB->query($queryStrStart.trim($queryStr,","));
+ echo "\n";
+ }
+
+ fclose($handle);
+ echo "Import complete. Inserted $total rows\n";
+ $ret = true;
+ }
+ } else {
+ echo "Filename empty\n";
+ }
+
+ return $ret;
+ }
+
+ /**
+ * check if needed DB Table is already there.
+ * Otherwise create one.
+ * @return bool
+ */
+ protected function _checkTable() {
+ $ret = false;
+
+ $queryStr = "SELECT count(*) AS amount
+ FROM information_schema.TABLES
+ WHERE (TABLE_SCHEMA = '".DB_NAME."') AND (TABLE_NAME = '".$this->_db_table_name."')";
+ try {
+ $query = $this->_DB->query($queryStr);
+ $result = $query->fetch_assoc();
+ if(!empty($result['amount'])) {
+ $ret = true;
+ }
+ }
+ catch (Exception $e) {
+ echo $e->getMessage();
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Creates the needed table für this import
+ * If there are any changes to the table you need to
+ * alter the insert queries too
+ * @return bool
+ */
+ protected function _createTable() {
+ $ret = false;
+
+ try {
+ $query = $this->_DB->query($this->_db_table_crate_str);
+ if($query) {
+ $ret = true;
+ }
+ }
+ catch (Exception $e) {
+ echo $e->getMessage();
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Count the file lines.
+ * Used for user info
+ * @param $file
+ * @return int
+ */
+ protected function _linesInFile($file) {
+ $file = new \SplFileObject($file, 'r');
+ $file->seek(PHP_INT_MAX);
+ return $file->key();
+ }
+
+}
\ No newline at end of file