Version: 1
Type: Full Script
Category: Databases
License: GNU General Public License
Description: IMDB-Scraper lists a directory and runs the filename through IMDB, parses the content, and writes it to a file to be inserted into MySQL. Also it downloads the large cover poster.
Ok, so what this does is ..
(1) Opens a directory, lists all the movie-type files: 'mkv, mp4, mpg, avi'
(2) Submits a query to Google: 'I'm feeling lucky'
(3) Downloads all the information, parses it, and stores it in a 'data' file
(4) Copies the large poster-image to a 'temp' folder
I'm sure you can figure it out
Note: The last character in the last line of data.txt will be a comma (,) that needs to be a semicolon (;).
You can write something into this to add the semicolon or just change it manually.
This script takes the tedious work out of storing your movie collection to a database, but I prefer to do the insertion to the database manually to insure fewer errors.
I didn't write the IMDB-Scraper (don't really know who did). I modified some parts of it, and made it automatic (90%, with the exception of the insert and the semicolon).
If anybody wishes to make those corrections :>
Anyways...
Create a database: moviedb
>Create a table: movies
>Then create new fields:
<-> movie_id ::: Random 9-10 digit id
<-> movie_fn ::: name of file located on the harddrive
<-> movie_path ::: Path where the file is located on the harddrive
<-> movie_title ::: Film Title
<-> movie_date ::: Release date: Day, Month, Year
<-> movie_actors ::: Cast members
<-> movie_about ::: Short description..
<-> movie_length ::: Length in minutes..
<-> movie_size ::: Size of the file on the disk..
<-> movie_rating ::: MPAA rating..
<-> movie_plot ::: This is the Synopsis field..
<-> movie_type ::: This is your Genres
<-> movie_when ::: This is for 'recently' added stuff.. The more recent, the closer to the top it is..
<-> movie_quality ::: This is a rough 'mkv/mp4' vs avi/mpg to lable as HD or SD -- not accurate unless you know your avis are SD and MKV's are HD, so on..
#!/usr/local/bin/php -q
<?php
// movie location
$path = "/movies/movies";
$cover_path = "$path/temp";
global $poster;
$dbhost = 'localhost';
$dbuser = 'YOUR_SQL_USERNAME';
$dbpass = 'YOUR_SQL_PASSWORD';
$conn = mysql_connect($dbhost, $dbuser, $dbpass) or die("WTF I CANT CONNECT!!\n");
$dbname = 'moviedb';
mysql_select_db($dbname);
if (file_exists("$path/data.txt"))
{
sleep(1);
exec("rm $path/data.txt \n");
$glob = "$cover_path/";
if (glob($glob . '*.jpg'))
{
exec("rm $cover_path/*.jpg");
}
}
$dir_handle = @opendir($path) or die("Unable to open $path");
$data_f = "$path/data.txt";
$list = fopen("$data_f", 'a');
fwrite($list, "INSERT INTO `moviedb`.`movies` (`movie_id` , `movie_fn` , `movie_path` , `movie_title` , `movie_date` , `movie_actors` , `movie_about` , `movie_length` , `movie_size` , `movie_rating` , `movie_plot` , `movie_type`, `movie_when`, `movie_quality`) VALUES\n");
fclose($list);
while ($file = readdir($dir_handle))
{
if ($file == '..' or $file == '.')
{
}
else
{
{
$var1 = explode('.', $file);
if ($var1[1] == avi or $var1[1] == mp4 or $var1[1] == mpg or $var1[1] == mkv)
{
$ext = $var1[1];
$sqlfn = $file;
$file = str_replace(".avi", " ", $file);
$file = str_replace(".mp4", " ", $file);
$file = str_replace(".mpg", " ", $file);
$file = str_replace(".mkv", " ", $file);
$file = str_replace("-", " ", $file);
$file = str_replace("_", " ", $file);
$fn = mysql_query("SELECT * FROM `movies` WHERE `movie_fn` LIKE '$sqlfn'") or die(mysql_error());
$row = mysql_fetch_array($fn);
if ($row[movie_fn] == $sqlfn)
{
// echo "Already got this one!";
}
else
{
echo "Test one passed: Attempting to fetch information for title: $sqlfn...\n";
$m = new MediaInfo();
$info = $m->getMovieInfo("$file", "$sqlfn");
}
}
else
{
//echo "Test failed: Skipping $file\n\n";
}
}
}
}
function consize($fs)
{
if ($fs >= 1073741824)
$fs = round($fs / 1073741824 * 100) / 100 . " Gb";
elseif ($fs >= 1048576)
$fs = round($fs / 1048576 * 100) / 100 . " Mb";
elseif ($fs >= 1024)
$fs = round($fs / 1024 * 100) / 100 . " Kb";
else
$fs = $fs . " b";
return $fs;
}
function fixapos($in)
{
$str = $in;
if (stristr($in, "'"))
{
$in = str_replace("'", "''", "$in");
}
if (stristr("$in", "'"))
{
$in = str_replace("'", "''", "$in");
}
if (stristr($in, "'"))
{
$in = str_replace("'", "''", "$in");
}
$in = html_entity_decode($in, ENT_QUOTES, "ISO-8859-1");
$in = preg_replace('/&#(\d+);/me', "chr(\\1)", $in);
$in = preg_replace('/&#x([a-f0-9]+);/mei', "chr(0x\\1)", $in);
return $in;
}
class MediaInfo
{
public $info;
function __construct($str = null)
{
if (!is_null($str))
$this->autodetect($str);
}
function autodetect($str)
{
// Attempt to cleanup $str in case it's a filename ;-)
$str = pathinfo($str, PATHINFO_FILENAME);
$str = $this->normalize($str);
// Is it a movie or tv show?
if (preg_match('/s[0-9][0-9]?.?e[0-9][0-9]?/i', $str) == 1)
$this->info = $this->getEpisodeInfo($str);
else
$this->info = $this->getMovieInfo($str);
return $this->info;
}
function getEpisodeInfo($str)
{
$arr = array();
$arr['kind'] = 'tv';
return $arr;
}
function getMovieInfo($str, $file)
{
$str = str_ireplace('the ', '', $str);
$url = "http://www.google.com/search?hl=en&q=imdb+" . urlencode($str) . "&btnI=I%27m+Feeling+Lucky";
$html = $this->geturl($url);
if (stripos($html, "302 Moved") !== false)
$html = $this->geturl($this->match('/HREF="(.*?)"/ms', $html, 1));
$arr = array();
$arr['kind'] = 'movie';
$arr['id'] = $this->match('/poster.*?(tt[0-9]+)/ms', $html, 1);
$arr['title'] = $this->match('/<title>(.*?)<\/title>/ms', $html, 1);
$arr['title'] = preg_replace('/\([0-9]+\)/', '', $arr['title']);
$arr['title'] = trim($arr['title']);
$arr['rating'] = $this->match('/([0-9]\.[0-9])\/10/ms', $html, 1);
$arr['director'] = trim(strip_tags($this->match('/Director:(.*?)<\/a>/ms', $html, 1)));
$arr['release_date'] = $this->match('/([0-9][0-9]? (January|February|March|April|May|June|July|August|September|October|November|December) (19|20)[0-9][0-9])/ms', $html, 1);
$arr['plot'] = trim(strip_tags($this->match('/Users:.*?<p>(.*?)(<\/p>|<a)/ms', $html, 1)));
$arr['storyline'] = trim(strip_tags($this->match('/Storyline<\/h2>(.*?)(<em|<\/p>|<span)/ms', $html, 1)));
$arr['runtime'] = trim($this->match('/Runtime:<\/h4>.*?([0-9]+) min.*?<\/div>/ms', $html, 1));
$arr['genres'] = array();
foreach ($this->match_all('/<a.*?>(.*?)<\/a>/ms', $this->match('/Genre.?:(.*?)(<\/div>|See more)/ms', $html, 1), 1) as $m)
{
array_push($arr['genres'], $m);
}
$arr['mpaa_rating'] = $this->match('/infobar">.<img.*?alt="(.*?)".*?>/ms', $html, 1);
$arr['cast'] = array();
foreach ($this->match_all('/<td class="name">(.*?)<\/td>/ms', $html, 1) as $m)
{
array_push($arr['cast'], trim(strip_tags($m)));
}
$arr['poster'] = $this->match('/(http:\/\/ia.media-imdb.com\/images.*?)" \/><\/a>/ms', $html, 1);
$arr['poster_large'] = "";
$arr['poster_small'] = "";
if ($arr['poster'] != '' && strrpos($arr['poster'], "nopicture") === false)
{
$arr['poster_large'] = substr($arr['poster'], 0, strrpos($arr['poster'], "_V1.")) . "_V1._SY500.jpg";
$arr['poster_small'] = substr($arr['poster'], 0, strrpos($arr['poster'], "_V1.")) . "_V1._SY150.jpg";
}
else
{
$arr['poster'] = "";
}
$poster = explode('=', $arr[poster_large]);
$this->poster = str_replace("'", "", $poster[2]);
$fn = mysql_query("SELECT * FROM `movies` WHERE `movie_fn` LIKE '$file'") or die(mysql_error());
$row = mysql_fetch_array($fn);
if (!$row[movie_id])
{
$doid = rand(600000000, 1000000000);
}
else
{
$doid = $row[movie_id];
}
global $cover_path;
$fnp = "$cover_path/" . $doid . ".jpg";
global $ext;
if ($this->poster)
{
if ($fp = fopen($this->poster, 'r'))
{
$content = '';
while ($line = fread($fp, 1024))
{
$content .= $line;
}
$fw = fopen("$fnp", 'w');
fwrite($fw, $content);
fclose($fp);
global $path;
global $sqlfn;
// $arr[size] = consize(filesize("$path/$sqlfn"));
$arr[size] = consize(shell_exec("du --block-size 1 $path/$sqlfn | awk '{print $1}'"));
$arr[title] = str_replace("'", "''", $arr[title]);
$arr[title] = str_replace(" - IMDb", "", $arr[title]);
$arr[whenadd] = exec('date +%s');
$genie = $arr['genres']['0'] . " " . $arr['genres']['1'] . " " . $arr['genres']['2'];
if ($ext == 'mkv' or $ext == 'mp4')
{
$arr['movie_quality'] = 'HD';
}
elseif ($ext == 'avi' or $ext == 'mpg')
{
$arr['movie_quality'] = 'SD';
}
else
{
$arr['movie_quality'] = '?';
}
$i = 0;
while ($i <= count($arr[cast]))
{
$varx .= $arr[cast][$i] . "&";
$i++;
}
$sql = "('$doid' , '$file' ,'/movies/movies' ,'" . fixapos($arr[title]) . "' ,'" . fixapos($arr[release_date]) . "','" . fixapos($varx) . "','" . fixapos($arr[plot]) . "','" . fixapos("$arr[runtime] minutes") . "','" . fixapos($arr[size]) . "','" . fixapos($arr[mpaa_rating]) . "','" . fixapos($arr[storyline]) . "','" . fixapos($genie) . "','" . fixapos($arr[whenadd]) . "','" . $arr[movie_quality] . "'),\n";
global $data_f;
$list = fopen("$data_f", 'a');
fwrite($list, "$sql\n") or die("Error writing file $sql");
echo "<-->Successfully wrote data for title: $arr[title]\n";
fclose($list);
}
else
{
//echo "Failed: an error occured when trying to open the specified url $image\n\n";
}
}
else
{
//echo "Failed: an error while fetching: $file\n";
}
return $arr;
}
// ****************************************************************
function normalize($str)
{
$str = str_replace('_', ' ', $str);
$str = str_replace('.', ' ', $str);
$str = preg_replace('/ +/', ' ', $str);
return $str;
}
function geturl($url, $username = null, $password = null)
{
$ch = curl_init();
if (!is_null($username) && !is_null($password))
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Authorization: Basic ' . base64_encode("$username:$password")));
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
$html = curl_exec($ch);
curl_close($ch);
return $html;
}
function match_all($regex, $str, $i = 0)
{
if (preg_match_all($regex, $str, $matches) === false)
return false;
else
return $matches[$i];
}
function match($regex, $str, $i = 0)
{
if (preg_match($regex, $str, $match) == 1)
return $match[$i];
else
return false;
}
}
global $path
//uncomment this if u want this to happen automatically. echo shell_exec(./do_thumbs.sh $path);
mysql_close($conn);
?>
<----bash file----->
Create a file called: do_thumbs.sh
chmod +x do_thumbs.sh
I wouldn't recommend removing comments until you verify that this works for you.. Adjust as needed, also, you need to verify you have:
imagemagick..
http://www.imagemagick.org/script/index.php
#!/bin/bash
if [ $(whoami) != root ]; then
echo "<-> You can only run this as root.."
exit 1
fi
if [ -z $1 ]; then
echo "missing argument"
exit 1
fi
if [ ! -d $1 ]; then
echo "<-> directory does not exist: $1"
exit 1
fi
#temporary write directory
path=${1}/temp
#original directory
origpath=${1}/covers
#thumbnail directory
thumbpath=${1}/covers/thumbs
# make sure these directories exist!
if [ ! -d $path ]; then
echo "<-> Creating directory: $path"
#mkdir $path
fi
if [ ! -d $origpath ]; then
echo "<-> Creating directory: $origpath"
#mkdir $origpath
fi
if [ ! -d $thumbpath ]; then
echo "<-> Creating directory: $thumbpath"
#mkdir $thumbpath
fi
i=0
for file in $(ls $path)
do
echo "<--> Processing: $file 89x131"
#convert -resize 89x131 $path/$file $thumbpath/$file
echo "<----> Created THUMBNAIL: $thumbpath/$file"
#mv $path/$file $origpath/$file
echo "<----> moving ORIGINAL: $path/$file -> $origpath/$file"
i=$[i+1]
done
echo "<!--> Finished. $i files processed."