From ed47c1557915bb2472f6959e723cd76155312a98 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Mon, 6 Apr 2020 00:50:58 +0800 Subject: Add deduper (unfinished tool for finding image duplicates). --- deduper/libpuzzle/php/examples/similar/similar.php | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 deduper/libpuzzle/php/examples/similar/similar.php (limited to 'deduper/libpuzzle/php/examples/similar/similar.php') diff --git a/deduper/libpuzzle/php/examples/similar/similar.php b/deduper/libpuzzle/php/examples/similar/similar.php new file mode 100644 index 0000000..4b3ad40 --- /dev/null +++ b/deduper/libpuzzle/php/examples/similar/similar.php @@ -0,0 +1,158 @@ + + + + +

Similar images finder using libpuzzle

+' . "\n"; + echo 'Enter an image URL (http only):' . "\n"; + echo '' . "\n"; + echo ''; + echo '' . "\n"; +} + +function display_error($err) { + echo '
' . htmlspecialchars($err) . '
' . "\n"; +} + +function display_loading() { + echo '
Loading...
' . "\n"; + @ob_flush(); flush(); +} + +function display_loaded() { + echo '
Loaded.
' . "\n"; + @ob_flush(); flush(); +} + +function display_signature_ok() { + echo '
Signature computed.
' . "\n"; + @ob_flush(); flush(); +} + +function remove_tmpfile($file) { + @unlink($file); +} + +function get_client_info() { + return @$_SERVER['REMOTE_ADDR'] . '/' . time(); +} + +function display_similar_pictures($urls) { + echo '
' . "\n"; + foreach ($urls as $url) { + echo ''; + echo ' '; + echo '' . "\n"; + + } + echo '
' . "\n"; +} + +function record_url($url, &$md5, &$cvec) { + if (function_exists('sys_get_temp_dir')) { + $tmpdir = sys_get_temp_dir(); + } else { + $tmpdir = '/tmp'; + } + $dfn = tempnam($tmpdir, 'similar-' . md5(uniqid(mt_rand(), TRUE))); + register_shutdown_function('remove_tmpfile', $dfn); + if (($dfp = fopen($dfn, 'w')) == FALSE) { + display_form(); + display_error('Unable to create the temporary file'); + return FALSE; + } + if (($fp = fopen($url, 'r')) == FALSE) { + display_form(); + display_error('Unable to open: [' . $url . ']'); + return FALSE; + } + $f = fread($fp, 4096); + $written = strlen($f); + if (empty($f)) { + display_form(); + display_error('Unable to load: [' . $url . ']'); + return FALSE; + } + fwrite($dfp, $f); + $infos = @getimagesize($dfn); + if (empty($infos) || + ($infos[2] !== IMAGETYPE_GIF && $infos[2] !== IMAGETYPE_JPEG && + $infos[2] !== IMAGETYPE_PNG) || + $infos[0] < 50 || $infos[1] < 50) { + fclose($dfp); + display_form(); + display_error('Unsupported image format'); + return FALSE; + } + fseek($dfp, strlen($f)); + while (!feof($fp)) { + $max = MAX_IMAGE_SIZE - $written; + if ($max > 65536) { + $max = 65536; + } + $t = fread($fp, $max); + fwrite($dfp, $t); + $written += strlen($t); + if ($written > MAX_IMAGE_SIZE) { + fclose($dfp); + display_form(); + display_error('File too large'); + return FALSE; + } + } + unset($t); + fclose($dfp); + display_loaded(); + $md5 = @md5_file($dfn); + if (empty($md5)) { + display_form(); + display_error('Unable to get the MD5 of the file'); + return FALSE; + } + $cvec = puzzle_fill_cvec_from_file($dfn); + if (empty($cvec)) { + display_form(); + display_error('Unable to compute image signature'); + return FALSE; + } + display_signature_ok(); + save_signature($url, get_client_info(), $md5, $cvec); + + return TRUE; +} + +$url = trim(@$_POST['url']); +if (empty($url)) { + display_form(); + exit(0); +} +if (strlen($url) > MAX_URL_SIZE || + preg_match('£^http://([a-z0-9-]+[.])+[a-z]{2,}/.£i', $url) <= 0) { + display_form(); + display_error('Invalid URL, must be http://...'); + exit(1); +} +display_loading(); +$md5 = FALSE; +$cvec = FALSE; +if (record_url($url, $md5, $cvec) !== TRUE) { + exit(1); +} +$urls = find_similar_pictures($md5, $cvec); +unset($cvec); +display_form(); +display_similar_pictures($urls); + +?> + + -- cgit v1.2.3