aboutsummaryrefslogtreecommitdiff
path: root/deduper/libpuzzle/php/examples
diff options
context:
space:
mode:
Diffstat (limited to 'deduper/libpuzzle/php/examples')
-rw-r--r--deduper/libpuzzle/php/examples/Makefile.am2
-rw-r--r--deduper/libpuzzle/php/examples/similar/Makefile.am6
-rw-r--r--deduper/libpuzzle/php/examples/similar/config.inc.php9
-rw-r--r--deduper/libpuzzle/php/examples/similar/schema.pgsql.sql230
-rw-r--r--deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql23
-rw-r--r--deduper/libpuzzle/php/examples/similar/similar.inc.php120
-rw-r--r--deduper/libpuzzle/php/examples/similar/similar.php158
7 files changed, 548 insertions, 0 deletions
diff --git a/deduper/libpuzzle/php/examples/Makefile.am b/deduper/libpuzzle/php/examples/Makefile.am
new file mode 100644
index 0000000..82c81ba
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = \
+ similar
diff --git a/deduper/libpuzzle/php/examples/similar/Makefile.am b/deduper/libpuzzle/php/examples/similar/Makefile.am
new file mode 100644
index 0000000..126f6df
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = \
+ schema.sqlite3.sql \
+ schema.pgsql.sql \
+ similar.php \
+ similar.inc.php \
+ config.inc.php
diff --git a/deduper/libpuzzle/php/examples/similar/config.inc.php b/deduper/libpuzzle/php/examples/similar/config.inc.php
new file mode 100644
index 0000000..d4e3b41
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/config.inc.php
@@ -0,0 +1,9 @@
+<?php
+
+define('MAX_IMAGE_SIZE', 1024 * 1024 * 4);
+define('MAX_URL_SIZE', 255);
+define('DB_DSN', 'sqlite:similar.sqlite3');
+define('MAX_WORDS', 100);
+define('MAX_WORD_LENGTH', 10);
+
+?>
diff --git a/deduper/libpuzzle/php/examples/similar/schema.pgsql.sql b/deduper/libpuzzle/php/examples/similar/schema.pgsql.sql
new file mode 100644
index 0000000..7dc6bc1
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/schema.pgsql.sql
@@ -0,0 +1,230 @@
+--
+-- PostgreSQL database dump
+--
+
+SET client_encoding = 'UTF8';
+SET standard_conforming_strings = off;
+SET check_function_bodies = false;
+SET client_min_messages = warning;
+SET escape_string_warning = off;
+
+SET SESSION AUTHORIZATION 'similar';
+
+--
+-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: similar
+--
+
+COMMENT ON SCHEMA public IS 'Standard public schema';
+
+
+SET search_path = public, pg_catalog;
+
+SET default_tablespace = '';
+
+SET default_with_oids = false;
+
+--
+-- Name: pictures; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE pictures (
+ id integer NOT NULL,
+ digest character(32) NOT NULL,
+ CONSTRAINT ck_digest CHECK ((char_length(digest) = 32))
+);
+
+
+--
+-- Name: pictures_id_seq; Type: SEQUENCE; Schema: public; Owner: similar
+--
+
+CREATE SEQUENCE pictures_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: pictures_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: similar
+--
+
+ALTER SEQUENCE pictures_id_seq OWNED BY pictures.id;
+
+
+--
+-- Name: sentpictures; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE sentpictures (
+ id integer NOT NULL,
+ url character varying(255) NOT NULL,
+ sender character varying(100) NOT NULL,
+ picture_id integer NOT NULL,
+ CONSTRAINT ck_url CHECK (((url)::text <> ''::text))
+);
+
+
+--
+-- Name: sentpictures_id_seq; Type: SEQUENCE; Schema: public; Owner: similar
+--
+
+CREATE SEQUENCE sentpictures_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: sentpictures_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: similar
+--
+
+ALTER SEQUENCE sentpictures_id_seq OWNED BY sentpictures.id;
+
+
+--
+-- Name: signatures; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE signatures (
+ id integer NOT NULL,
+ compressed_signature bytea NOT NULL,
+ picture_id integer NOT NULL,
+ CONSTRAINT ck_signature CHECK ((octet_length(compressed_signature) >= 182))
+);
+
+
+--
+-- Name: signatures_id_seq; Type: SEQUENCE; Schema: public; Owner: similar
+--
+
+CREATE SEQUENCE signatures_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: signatures_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: similar
+--
+
+ALTER SEQUENCE signatures_id_seq OWNED BY signatures.id;
+
+
+--
+-- Name: words; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE words (
+ pos_and_word bytea NOT NULL,
+ signature_id integer NOT NULL,
+ CONSTRAINT ck_pos_and_word CHECK ((octet_length(pos_and_word) >= 2))
+);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: public; Owner: similar
+--
+
+ALTER TABLE pictures ALTER COLUMN id SET DEFAULT nextval('pictures_id_seq'::regclass);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: public; Owner: similar
+--
+
+ALTER TABLE sentpictures ALTER COLUMN id SET DEFAULT nextval('sentpictures_id_seq'::regclass);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: public; Owner: similar
+--
+
+ALTER TABLE signatures ALTER COLUMN id SET DEFAULT nextval('signatures_id_seq'::regclass);
+
+
+--
+-- Name: pictures_pkey; Type: CONSTRAINT; Schema: public; Owner: similar; Tablespace:
+--
+
+ALTER TABLE ONLY pictures
+ ADD CONSTRAINT pictures_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: sentpictures_pkey; Type: CONSTRAINT; Schema: public; Owner: similar; Tablespace:
+--
+
+ALTER TABLE ONLY sentpictures
+ ADD CONSTRAINT sentpictures_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: signatures_pkey; Type: CONSTRAINT; Schema: public; Owner: similar; Tablespace:
+--
+
+ALTER TABLE ONLY signatures
+ ADD CONSTRAINT signatures_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: idx_digest; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE UNIQUE INDEX idx_digest ON pictures USING btree (digest);
+
+
+--
+-- Name: idx_picture_id; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE INDEX idx_picture_id ON sentpictures USING btree (picture_id);
+
+
+--
+-- Name: idx_pos_and_word; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE INDEX idx_pos_and_word ON words USING btree (pos_and_word);
+
+
+--
+-- Name: idx_url; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE UNIQUE INDEX idx_url ON sentpictures USING btree (url);
+
+
+--
+-- Name: sentpictures_picture_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: similar
+--
+
+ALTER TABLE ONLY sentpictures
+ ADD CONSTRAINT sentpictures_picture_id_fkey FOREIGN KEY (picture_id) REFERENCES pictures(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+--
+-- Name: signatures_picture_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: similar
+--
+
+ALTER TABLE ONLY signatures
+ ADD CONSTRAINT signatures_picture_id_fkey FOREIGN KEY (picture_id) REFERENCES pictures(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+--
+-- Name: words_signature_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: similar
+--
+
+ALTER TABLE ONLY words
+ ADD CONSTRAINT words_signature_id_fkey FOREIGN KEY (signature_id) REFERENCES signatures(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+--
+-- PostgreSQL database dump complete
+--
+
diff --git a/deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql b/deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql
new file mode 100644
index 0000000..dc5a6c3
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql
@@ -0,0 +1,23 @@
+CREATE TABLE pictures (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ digest CHAR(32) NOT NULL
+);
+CREATE TABLE sentpictures (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ url VARCHAR(255) NOT NULL,
+ sender VARCHAR(100) NOT NULL,
+ picture_id INTEGER NOT NULL
+);
+CREATE TABLE signatures (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ compressed_signature CHAR(182) NOT NULL,
+ picture_id INTEGER NOT NULL
+);
+CREATE TABLE words (
+ pos_and_word CHAR(5) NOT NULL,
+ signature_id INTEGER NOT NULL
+);
+CREATE UNIQUE INDEX idx_digest ON pictures(digest);
+CREATE INDEX idx_picture_id ON sentpictures (picture_id);
+CREATE INDEX idx_pos_and_word ON words(pos_and_word);
+CREATE UNIQUE INDEX idx_url ON sentpictures (url);
diff --git a/deduper/libpuzzle/php/examples/similar/similar.inc.php b/deduper/libpuzzle/php/examples/similar/similar.inc.php
new file mode 100644
index 0000000..cfc806e
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/similar.inc.php
@@ -0,0 +1,120 @@
+<?php
+
+function split_into_words($sig) {
+ $words = array();
+ $u = 0;
+ do {
+ $words[$u] = substr($sig, $u, MAX_WORD_LENGTH);
+ } while (++$u < MAX_WORDS);
+
+ return $words;
+}
+
+function save_signature($url, $client_info, $md5, $cvec) {
+ $compressed_cvec = puzzle_compress_cvec($cvec);
+ $words = split_into_words($cvec);
+ $dbh = new PDO(DB_DSN);
+ $dbh->beginTransaction();
+ try {
+ $st = $dbh->prepare
+ ('DELETE FROM sentpictures WHERE url = :url');
+ $st->execute(array(':url' => $url));
+ $st = $dbh->prepare
+ ('SELECT id FROM pictures WHERE digest = :digest');
+ $st->execute(array(':digest' => $md5));
+ $picture_id = $st->fetchColumn();
+ $st->closeCursor();
+ $duplicate = TRUE;
+ if ($picture_id === FALSE) {
+ $duplicate = FALSE;
+ $st = $dbh->prepare
+ ('INSERT INTO pictures (digest) VALUES (:digest)');
+ $st->execute(array(':digest' => $md5));
+ $picture_id = $dbh->lastInsertId('id');
+ }
+ $st = $dbh->prepare
+ ('INSERT INTO sentpictures (url, sender, picture_id) ' .
+ 'VALUES (:url, :sender, :picture_id)');
+ $st->execute(array(':url' => $url, ':sender' => $client_info,
+ ':picture_id' => $picture_id));
+ if ($duplicate === TRUE) {
+ $dbh->commit();
+ return TRUE;
+ }
+ $st = $dbh->prepare
+ ('INSERT INTO signatures (compressed_signature, picture_id) ' .
+ 'VALUES(:compressed_signature, :picture_id)');
+ $st->execute(array(':compressed_signature' => $compressed_cvec,
+ ':picture_id' => $picture_id));
+ $signature_id = $dbh->lastInsertId('id');
+ $st = $dbh->prepare
+ ('INSERT INTO words (pos_and_word, signature_id) ' .
+ 'VALUES (:pos_and_word, :signature_id)');
+ foreach ($words as $u => $word) {
+ $st->execute(array('pos_and_word'
+ => chr($u) . puzzle_compress_cvec($word),
+ 'signature_id' => $signature_id));
+ }
+ $dbh->commit();
+ } catch (Exception $e) {
+ var_dump($e);
+ $dbh->rollback();
+ }
+ return TRUE;
+}
+
+function find_similar_pictures($md5, $cvec,
+ $threshold = PUZZLE_CVEC_SIMILARITY_THRESHOLD) {
+ $compressed_cvec = puzzle_compress_cvec($cvec);
+ $words = split_into_words($cvec);
+ $dbh = new PDO(DB_DSN);
+ $dbh->beginTransaction();
+ $sql = 'SELECT DISTINCT(signature_id) AS signature_id FROM words ' .
+ 'WHERE pos_and_word IN (';
+ $coma = FALSE;
+ foreach ($words as $u => $word) {
+ if ($coma === TRUE) {
+ $sql .= ',';
+ }
+ $sql .= $dbh->quote(chr($u) . puzzle_compress_cvec($word));
+ $coma = TRUE;
+ }
+ $sql .= ')';
+ $res_words = $dbh->query($sql);
+ $scores = array();
+ $st = $dbh->prepare('SELECT compressed_signature, picture_id ' .
+ 'FROM signatures WHERE id = :id');
+ while (($signature_id = $res_words->fetchColumn()) !== FALSE) {
+ $st->execute(array(':id' => $signature_id));
+ $row = $st->fetch();
+ $found_compressed_signature = $row['compressed_signature'];
+ $picture_id = $row['picture_id'];
+ $found_cvec = puzzle_uncompress_cvec($found_compressed_signature);
+ $distance = puzzle_vector_normalized_distance($cvec, $found_cvec);
+ if ($distance < $threshold && $distance > 0.0) {
+ $scores[$picture_id] = $distance;
+ }
+ }
+ $sql = 'SELECT url FROM sentpictures WHERE picture_id IN (';
+ $coma = FALSE;
+ foreach ($scores as $picture_id => $score) {
+ if ($coma === TRUE) {
+ $sql .= ',';
+ }
+ $sql .= $dbh->quote($picture_id);
+ $coma = TRUE;
+ }
+ $sql .= ')';
+ $urls = array();
+ if (!empty($scores)) {
+ $res_urls = $dbh->query($sql);
+ while (($url = $res_urls->fetchColumn()) !== FALSE) {
+ array_push($urls, $url);
+ }
+ }
+ $dbh->commit();
+
+ return $urls;
+}
+
+?>
diff --git a/deduper/libpuzzle/php/examples/similar/similar.php b/deduper/libpuzzle/php/examples/similar/similar.php
new file mode 100644
index 0000000..4b3ad40
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/similar.php
@@ -0,0 +1,158 @@
+<html><!-- sample image search engine, part of the libpuzzle package -->
+<head>
+</head>
+<body>
+<h1>Similar images finder using <a href="http://libpuzzle.pureftpd.org">libpuzzle</a></h1>
+<?php
+
+error_reporting(E_ALL);
+
+require_once 'config.inc.php';
+require_once 'similar.inc.php';
+
+function display_form() {
+ echo '<form action="' . htmlspecialchars($_SERVER['REQUEST_URI']) . '" ' .
+ 'method="POST">' . "\n";
+ echo 'Enter an image URL (http only):' . "\n";
+ echo '<input type="text" size="100" value="" autocomplete="off" name="url" />' . "\n";
+ echo '<input type="submit" />';
+ echo '</form>' . "\n";
+}
+
+function display_error($err) {
+ echo '<div id="err"><strong>' . htmlspecialchars($err) . '</strong></div>' . "\n";
+}
+
+function display_loading() {
+ echo '<div id="loading">Loading...</div>' . "\n";
+ @ob_flush(); flush();
+}
+
+function display_loaded() {
+ echo '<div id="loaded">Loaded.</div>' . "\n";
+ @ob_flush(); flush();
+}
+
+function display_signature_ok() {
+ echo '<div id="sig-ok">Signature computed.</div>' . "\n";
+ @ob_flush(); flush();
+}
+
+function remove_tmpfile($file) {
+ @unlink($file);
+}
+
+function get_client_info() {
+ return @$_SERVER['REMOTE_ADDR'] . '/' . time();
+}
+
+function display_similar_pictures($urls) {
+ echo '<div id="images">' . "\n";
+ foreach ($urls as $url) {
+ echo '<a href="' . htmlentities($url) . '" ' .
+ 'onclick="window.open(this.href); return false;">';
+ echo ' <img src="' . htmlentities($url) . '" alt="" />';
+ echo '</a>' . "\n";
+
+ }
+ echo '</div>' . "\n";
+}
+
+function record_url($url, &$md5, &$cvec) {
+ if (function_exists('sys_get_temp_dir')) {
+ $tmpdir = sys_get_temp_dir();
+ } else {
+ $tmpdir = '/tmp';
+ }
+ $dfn = tempnam($tmpdir, 'similar-' . md5(uniqid(mt_rand(), TRUE)));
+ register_shutdown_function('remove_tmpfile', $dfn);
+ if (($dfp = fopen($dfn, 'w')) == FALSE) {
+ display_form();
+ display_error('Unable to create the temporary file');
+ return FALSE;
+ }
+ if (($fp = fopen($url, 'r')) == FALSE) {
+ display_form();
+ display_error('Unable to open: [' . $url . ']');
+ return FALSE;
+ }
+ $f = fread($fp, 4096);
+ $written = strlen($f);
+ if (empty($f)) {
+ display_form();
+ display_error('Unable to load: [' . $url . ']');
+ return FALSE;
+ }
+ fwrite($dfp, $f);
+ $infos = @getimagesize($dfn);
+ if (empty($infos) ||
+ ($infos[2] !== IMAGETYPE_GIF && $infos[2] !== IMAGETYPE_JPEG &&
+ $infos[2] !== IMAGETYPE_PNG) ||
+ $infos[0] < 50 || $infos[1] < 50) {
+ fclose($dfp);
+ display_form();
+ display_error('Unsupported image format');
+ return FALSE;
+ }
+ fseek($dfp, strlen($f));
+ while (!feof($fp)) {
+ $max = MAX_IMAGE_SIZE - $written;
+ if ($max > 65536) {
+ $max = 65536;
+ }
+ $t = fread($fp, $max);
+ fwrite($dfp, $t);
+ $written += strlen($t);
+ if ($written > MAX_IMAGE_SIZE) {
+ fclose($dfp);
+ display_form();
+ display_error('File too large');
+ return FALSE;
+ }
+ }
+ unset($t);
+ fclose($dfp);
+ display_loaded();
+ $md5 = @md5_file($dfn);
+ if (empty($md5)) {
+ display_form();
+ display_error('Unable to get the MD5 of the file');
+ return FALSE;
+ }
+ $cvec = puzzle_fill_cvec_from_file($dfn);
+ if (empty($cvec)) {
+ display_form();
+ display_error('Unable to compute image signature');
+ return FALSE;
+ }
+ display_signature_ok();
+ save_signature($url, get_client_info(), $md5, $cvec);
+
+ return TRUE;
+}
+
+$url = trim(@$_POST['url']);
+if (empty($url)) {
+ display_form();
+ exit(0);
+}
+if (strlen($url) > MAX_URL_SIZE ||
+ preg_match('£^http://([a-z0-9-]+[.])+[a-z]{2,}/.£i', $url) <= 0) {
+ display_form();
+ display_error('Invalid URL, must be http://...');
+ exit(1);
+}
+display_loading();
+$md5 = FALSE;
+$cvec = FALSE;
+if (record_url($url, $md5, $cvec) !== TRUE) {
+ exit(1);
+}
+$urls = find_similar_pictures($md5, $cvec);
+unset($cvec);
+display_form();
+display_similar_pictures($urls);
+
+?>
+</body>
+</html>