aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Virtools/3dxml.py70
-rw-r--r--deduper/CMakeLists.txt20
-rw-r--r--deduper/deduper.cpp195
-rw-r--r--deduper/libpuzzle/AUTHORS1
-rw-r--r--deduper/libpuzzle/COPYING17
-rw-r--r--deduper/libpuzzle/ChangeLog0
-rw-r--r--deduper/libpuzzle/Makefile.am11
-rw-r--r--deduper/libpuzzle/NEWS0
-rw-r--r--deduper/libpuzzle/README202
-rw-r--r--deduper/libpuzzle/README-PHP76
-rw-r--r--deduper/libpuzzle/THANKS6
-rwxr-xr-xdeduper/libpuzzle/autogen.sh17
-rw-r--r--deduper/libpuzzle/composer.json10
-rw-r--r--deduper/libpuzzle/configure.ac70
-rw-r--r--deduper/libpuzzle/man/Makefile.am7
-rw-r--r--deduper/libpuzzle/man/libpuzzle.3296
-rw-r--r--deduper/libpuzzle/man/puzzle-diff.858
-rw-r--r--deduper/libpuzzle/man/puzzle_set.3129
-rw-r--r--deduper/libpuzzle/php/Makefile.am3
-rw-r--r--deduper/libpuzzle/php/examples/Makefile.am2
-rw-r--r--deduper/libpuzzle/php/examples/similar/Makefile.am6
-rw-r--r--deduper/libpuzzle/php/examples/similar/config.inc.php9
-rw-r--r--deduper/libpuzzle/php/examples/similar/schema.pgsql.sql230
-rw-r--r--deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql23
-rw-r--r--deduper/libpuzzle/php/examples/similar/similar.inc.php120
-rw-r--r--deduper/libpuzzle/php/examples/similar/similar.php158
-rw-r--r--deduper/libpuzzle/php/libpuzzle/CREDITS1
-rw-r--r--deduper/libpuzzle/php/libpuzzle/EXPERIMENTAL0
-rw-r--r--deduper/libpuzzle/php/libpuzzle/LICENSE15
-rw-r--r--deduper/libpuzzle/php/libpuzzle/Makefile.am15
-rw-r--r--deduper/libpuzzle/php/libpuzzle/README4
-rw-r--r--deduper/libpuzzle/php/libpuzzle/build/Makefile.am0
-rw-r--r--deduper/libpuzzle/php/libpuzzle/config.m449
-rw-r--r--deduper/libpuzzle/php/libpuzzle/include/Makefile.am0
-rw-r--r--deduper/libpuzzle/php/libpuzzle/libpuzzle.c410
-rw-r--r--deduper/libpuzzle/php/libpuzzle/libpuzzle.php21
-rw-r--r--deduper/libpuzzle/php/libpuzzle/modules/Makefile.am0
-rw-r--r--deduper/libpuzzle/php/libpuzzle/php_libpuzzle.h66
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/001.phpt10
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/002.phpt15
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/003.phpt24
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/Makefile.am7
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/pics/Makefile.am3
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-0.jpgbin0 -> 13946 bytes
-rw-r--r--deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-1.jpgbin0 -> 27407 bytes
-rw-r--r--deduper/libpuzzle/src/CMakeLists.txt21
-rw-r--r--deduper/libpuzzle/src/Makefile.am72
-rw-r--r--deduper/libpuzzle/src/compress.c125
-rw-r--r--deduper/libpuzzle/src/cvec.c202
-rw-r--r--deduper/libpuzzle/src/dvec.c663
-rw-r--r--deduper/libpuzzle/src/globals.h26
-rw-r--r--deduper/libpuzzle/src/pics/Makefile.am8
-rw-r--r--deduper/libpuzzle/src/pics/duck.gifbin0 -> 7196 bytes
-rw-r--r--deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpgbin0 -> 41128 bytes
-rw-r--r--deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpgbin0 -> 19800 bytes
-rw-r--r--deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpgbin0 -> 24646 bytes
-rw-r--r--deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpgbin0 -> 16128 bytes
-rw-r--r--deduper/libpuzzle/src/pics/pic-a-0.jpgbin0 -> 13946 bytes
-rw-r--r--deduper/libpuzzle/src/pics/pic-a-1.jpgbin0 -> 27407 bytes
-rw-r--r--deduper/libpuzzle/src/puzzle-diff.c130
-rw-r--r--deduper/libpuzzle/src/puzzle.c22
-rw-r--r--deduper/libpuzzle/src/puzzle.h122
-rw-r--r--deduper/libpuzzle/src/puzzle_common.h18
-rw-r--r--deduper/libpuzzle/src/puzzle_p.h67
-rw-r--r--deduper/libpuzzle/src/regress_1.c32
-rw-r--r--deduper/libpuzzle/src/regress_2.c72
-rw-r--r--deduper/libpuzzle/src/regress_3.c35
-rw-r--r--deduper/libpuzzle/src/tunables.c84
-rw-r--r--deduper/libpuzzle/src/vector_ops.c95
-rw-r--r--deduper/thread_pool.h127
-rw-r--r--music/it2midi.cpp1105
72 files changed, 5377 insertions, 26 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9a150e2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+deduper/build
diff --git a/Virtools/3dxml.py b/Virtools/3dxml.py
index 149d9c6..a72cf54 100644
--- a/Virtools/3dxml.py
+++ b/Virtools/3dxml.py
@@ -5,7 +5,7 @@ bl_info={
"description":"Import 3D XML 3.0",
"author":"Chris Xiong",
"version":(0,1),
- "blender":(2,79,0),
+ "blender":(2,82,0),
"category":"Import-Export",
"support":"TESTING"
}
@@ -36,7 +36,7 @@ bl_info={
################################################################
import bpy,bmesh,bpy_extras,mathutils
import xml.etree.ElementTree as etree
-import pathlib,zipfile,time,os,tempfile
+import pathlib,zipfile,time,os,tempfile,math
NS="{http://www.3ds.com/xsd/3DXML}"
@@ -56,11 +56,11 @@ def load_textures(tree):
txp=[]
if txd.is_dir():
txp=[i for i in txd.iterdir() if i.stem.lower()==txname.lower()]
- if len(txp)<1 and tex.attrib["href"] is not None:
+ if len(txp)<1 and tex.attrib["href"] is not None and tex.attrib["href"] in texturefiles:
txp=[texturefiles[tex.attrib["href"]]]
- tx=bpy.data.textures.new(txname,'IMAGE')
+ tx=None
try:
- tx.image=bpy.data.images.load(str(txp[0]),True)
+ tx=bpy.data.images.load(str(txp[0]),check_existing=True)
except IndexError:
print(txname)
textures[tex.attrib["id"]]=tx
@@ -69,15 +69,20 @@ def load_materials(tree):
for mat in tree.findall(f".//{NS}GraphicMaterial"):
mname=mat.attrib["name"]
m=bpy.data.materials.new(mname)
- mslot=m.texture_slots.add()
+ m.use_nodes=True
+ for node in m.node_tree.nodes:
+ m.node_tree.nodes.remove(node)
+ bnode=m.node_tree.nodes.new(type="ShaderNodeBsdfPrincipled")
+ inode=m.node_tree.nodes.new(type="ShaderNodeTexImage")
+ mnode=m.node_tree.nodes.new(type="ShaderNodeOutputMaterial")
try:
- mslot.texture=textures[mat.attrib["texture"].split(":")[-1]]
+ inode.image=textures[mat.attrib["texture"].split(":")[-1]]
except KeyError:
pass
- m.diffuse_color=[float(mat.find(f"{NS}Diffuse").attrib[i])for i in ["red","green","blue"]]
- m.diffuse_intensity=float(mat.attrib["diffuseCoef"])
+ m.node_tree.links.new(inode.outputs[0],bnode.inputs[0])
+ m.node_tree.links.new(bnode.outputs[0],mnode.inputs[0])
+ m.diffuse_color=[float(mat.find(f"{NS}Diffuse").attrib[i])for i in ["red","green","blue"]]+[float(mat.attrib["diffuseCoef"])]
m.specular_color=[float(mat.find(f"{NS}Specular").attrib[i])for i in ["red","green","blue"]]
- m.specular_alpha=float(mat.find(f"{NS}Specular").attrib["alpha"])
m.specular_intensity=float(mat.attrib["specularCoef"])
materials[mat.attrib["id"]]=m
@@ -85,6 +90,7 @@ def load_meshes(tree):
for rep in tree.findall(f".//{NS}Representation"):
rid=rep.attrib["id"]
verts=unflatten(rep.find(f".//{NS}Positions").text,float,3,unitfactor)
+ normals=unflatten(rep.find(f".//{NS}Normals").text,float,3)
uvs=unflatten(rep.find(f".//{NS}TextureCoordinates").text,float,2)
faces=[]
facemat=[]
@@ -111,15 +117,20 @@ def load_meshes(tree):
faces.extend(unflatten(face.attrib["fans"],int,4))
facemat.extend([fmat]*len(unflatten(face.attrib["fans"],int,4)))
meshmat[rid]=matslots
- create_mesh(verts,faces,facemat,uvs,rid)
+ create_mesh(verts,faces,facemat,normals,uvs,rid)
def load_objects(tree):
rr=tree.findall(f".//{NS}ReferenceRep[@format='TESSELLATED']")
- i3d=tree.findall(f".//{NS}Instance3D")
+ sr=set()
+ for ref in rr:
+ n=ref.attrib["name"]
+ sr.add(n[:n.rfind('_')])
+ ti3d=tree.findall(f".//{NS}Instance3D")
+ i3d=[i3 for i3 in ti3d if i3.attrib["name"][:i3.attrib["name"].rfind('_')] in sr]
for ref,i3 in zip(rr,i3d):
meshid=ref.attrib["associatedFile"].split(":")[-1]
objname=ref.attrib["name"]
- objname=objname[0:objname.rfind('_')]
+ objname=objname[:objname.rfind('_')]
mat=list(map(float,i3.find(f"./{NS}RelativeMatrix").text.split(' ')))
obj=bpy.data.objects.new(objname,meshes[meshid])
_wmat=mathutils.Matrix()
@@ -129,18 +140,19 @@ def load_objects(tree):
obj.data.materials.append(materials[m])
obj.matrix_world=_wmat
scn=bpy.context.scene
- scn.objects.link(obj)
- scn.objects.active=obj
- obj.select=True
- bpy.ops.object.shade_smooth()
+ scn.collection.objects.link(obj)
+ obj.select_set(True)
-def create_mesh(verts,faces,facemat,uvs,meshidx):
- if len(uvs)>len(verts):
+ bpy.ops.object.shade_smooth()
+
+def create_mesh(verts,faces,facemat,norms,uvs,meshidx):
+ if len(uvs)<len(verts):
uvs.append([uvs[0]]*(len(verts)-len(uvs)))
meshname=f"Mesh_{meshidx}"
mesh=bmesh.new()
- for i in verts:
- mesh.verts.new(i)
+ for vert,norm in zip(verts,norms):
+ v=mesh.verts.new(vert)
+ v.normal=norm
mesh.verts.ensure_lookup_table()
mesh.verts.index_update()
for i,m in zip(faces,facemat):
@@ -155,8 +167,10 @@ def create_mesh(verts,faces,facemat,uvs,meshidx):
for lp in face.loops:
lp[uv].uv=mathutils.Vector(uvs[lp.vert.index])
msh=bpy.data.meshes.new(meshname)
+ if usesmooth:
+ msh.use_auto_smooth=True
+ msh.auto_smooth_angle=smoothangle
mesh.to_mesh(msh)
- msh.use_auto_smooth=True
meshes[meshidx]=msh
mesh.free()
@@ -203,12 +217,16 @@ class ImportDialog(bpy.types.Operator,bpy_extras.io_utils.ImportHelper):
filter_glob=bpy.props.StringProperty(default='*.3dxml',options={'HIDDEN'})
pt=bpy.props.StringProperty(name="Texture path",default=texdir)
- uf=bpy.props.FloatProperty(name="Unit factor",default=unitfactor)
+ uf=bpy.props.FloatProperty(name="Unit factor",default=unitfactor,min=0)
+ us=bpy.props.BoolProperty(name="Use auto smooth instead of the normal data in the 3DXML file",default=False)
+ aa=bpy.props.FloatProperty(name="Auto smooth angle",min=0,max=180,default=90)
def execute(self,context):
- global texdir,unitfactor
+ global texdir,unitfactor,usesmooth,smoothangle
texdir=self.pt
unitfactor=self.uf
+ usesmooth=self.us
+ smoothangle=self.aa/180*math.pi
read(self.filepath)
return {'FINISHED'}
@@ -229,11 +247,11 @@ def menu_func_import_tdxml(self,context):
def register():
bpy.utils.register_class(ImportDialog)
bpy.utils.register_class(_3DXMLImport)
- bpy.types.INFO_MT_file_import.append(menu_func_import_tdxml)
+ bpy.types.TOPBAR_MT_file_import.append(menu_func_import_tdxml)
def unregister():
bpy.utils.unregister_class(_3DXMLImport)
- bpy.utils.unregister_class(ImportDialog)
+ bpy.utils.unrigister_class(ImportDialog)
if __name__=="__main__":
register()
diff --git a/deduper/CMakeLists.txt b/deduper/CMakeLists.txt
new file mode 100644
index 0000000..ac0859d
--- /dev/null
+++ b/deduper/CMakeLists.txt
@@ -0,0 +1,20 @@
+cmake_minimum_required(VERSION 3.11.0)
+project(deduper C CXX)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+include(FindPkgConfig)
+pkg_search_module(gdlib REQUIRED gdlib)
+find_package(Threads REQUIRED)
+
+add_subdirectory(libpuzzle/src)
+add_executable(deduper deduper.cpp thread_pool.h)
+target_link_directories(deduper
+ PRIVATE
+ ${gdlib_LIBRARY_DIRS}
+)
+target_link_libraries(deduper
+ puzzle
+ ${gdlib_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+)
diff --git a/deduper/deduper.cpp b/deduper/deduper.cpp
new file mode 100644
index 0000000..8f6e2f4
--- /dev/null
+++ b/deduper/deduper.cpp
@@ -0,0 +1,195 @@
+#include "libpuzzle/src/puzzle.h"
+
+#include <cstdio>
+#include <cstring>
+
+#include <filesystem>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <getopt.h>
+
+#include "thread_pool.h"
+
+PuzzleContext pzctx;
+int ctr;
+int recursive;
+int njobs=1;
+double threshold=0.3;
+std::vector<std::string> paths;
+
+int parse_arguments(int argc,char **argv)
+{
+ recursive=0;
+ int help=0;
+ option longopt[]=
+ {
+ {"recursive",no_argument ,&recursive,1},
+// {"destdir" ,required_argument,0 ,'D'},
+ {"jobs" ,required_argument,0 ,'j'},
+ {"threshold",required_argument,0 ,'d'},
+ {"help" ,no_argument ,&help ,1},
+ {0 ,0 ,0 ,0}
+ };
+ while(1)
+ {
+ int idx=0;
+ int c=getopt_long(argc,argv,"rhj:d:",longopt,&idx);
+ if(!~c)break;
+ switch(c)
+ {
+ case 0:
+ if(longopt[idx].flag)break;
+ if(std::string("jobs")==longopt[idx].name)
+ sscanf(optarg,"%d",&njobs);
+ if(std::string("threshold")==longopt[idx].name)
+ sscanf(optarg,"%lf",&threshold);
+ break;
+ case 'r':
+ recursive=1;
+ break;
+ case 'h':
+ help=1;
+ break;
+ case 'j':
+ sscanf(optarg,"%d",&njobs);
+ break;
+ case 'd':
+ sscanf(optarg,"%lf",&threshold);
+ break;
+ }
+ }
+ for(;optind<argc;++optind)
+ paths.push_back(argv[optind]);
+ if(help||argc<2)
+ {
+ printf(
+ "Usage: %s [OPTION] PATH...\n"
+ "Detect potentially duplicate images in PATHs and optionally perform an action on them.\n\n"
+ " -h, --help Display this help message and exit.\n"
+ " -r, --recursive Recurse into all directories.\n"
+ " -j, --jobs Number of concurrent tasks to run at once.\n"
+ " -d, --threshold Threshold distance below which images will be considered similar.\n"
+ ,argv[0]
+ );
+ return 1;
+ }
+ if(threshold>1||threshold<0)
+ {
+ puts("Invalid threshold value.");
+ return 2;
+ }
+ if(threshold<1e-6)threshold=1e-6;
+ if(!paths.size())
+ {
+ puts("Missing image path.");
+ return 2;
+ }
+ return 0;
+}
+
+void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::string>&out)
+{
+ if(recursive)
+ {
+ auto dirit=std::filesystem::recursive_directory_iterator(path);
+ for(auto &p:dirit)
+ {
+ FILE* fp=fopen(p.path().c_str(),"r");
+ char c[8];
+ fread((void*)c,1,6,fp);
+ if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)||!memcmp(c,"GIF87a",6)||!memcmp(c,"GIF89a",6))
+ out.push_back(p.path().string());
+ fclose(fp);
+ }
+ }
+ else
+ {
+ auto dirit=std::filesystem::directory_iterator(path);
+ for(auto &p:dirit)
+ {
+ FILE* fp=fopen(p.path().c_str(),"r");
+ char c[8];
+ fread((void*)c,1,6,fp);
+ if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)||!memcmp(c,"GIF87a",6)||!memcmp(c,"GIF89a",6))
+ out.push_back(p.path().string());
+ fclose(fp);
+ }
+ }
+}
+
+void compute_signature_vectors(const std::vector<std::string>&files,std::vector<PuzzleCvec>&output)
+{
+ thread_pool tp(njobs);
+ for(size_t i=0;i<files.size();++i)
+ {
+ puzzle_init_cvec(&pzctx,&output[i]);
+ auto job_func=[&](int thid,size_t id){
+ fprintf(stderr,"spawned: on thread#%d, file#%lu\n",thid,id);
+ puzzle_fill_cvec_from_file(&pzctx,&output[id],files[id].c_str());
+ fprintf(stderr,"done: file#%lu\n",id);
+ printf("%d/%lu\r",++ctr,files.size());
+ fflush(stdout);
+ };
+ tp.create_task(job_func,i);
+ }
+ tp.wait();
+}
+
+void compare_signature_vectors(const std::vector<PuzzleCvec>&vec,std::vector<std::tuple<size_t,size_t,double>>&out)
+{
+ thread_pool tp(njobs);
+ for(size_t i=0;i<vec.size();++i)
+ for(size_t j=i+1;j<vec.size();++j)
+ {
+ auto job_func=[&](int thid,size_t ida,size_t idb){
+ fprintf(stderr,"spawned: on thread#%d, file#%lu<->file#%lu\n",thid,ida,idb);
+ if(vec[ida].sizeof_vec&&vec[idb].sizeof_vec)
+ {
+ double d=puzzle_vector_normalized_distance(&pzctx,&vec[ida],&vec[idb],1);
+ if(d<threshold)out.emplace_back(ida,idb,d);
+ fprintf(stderr,"done:file#%lu<->file#%lu: %lf\n",ida,idb,d);
+ }
+ printf("%d/%lu\r",++ctr,vec.size()*(vec.size()-1)/2);
+ fflush(stdout);
+ };
+ tp.create_task(job_func,i,j);
+ }
+ tp.wait();
+}
+
+int main(int argc,char** argv)
+{
+ if(int pr=parse_arguments(argc,argv))return pr-1;
+ puts("building list of files to compare...");
+ std::vector<std::string> x;
+ for(auto&p:paths)
+ build_file_list(p,recursive,x);
+ printf("%lu files to compare.\n",x.size());
+ puts("computing signature vectors...");
+ puzzle_init_context(&pzctx);
+ std::vector<PuzzleCvec> cvecs;
+ cvecs.resize(x.size());
+ compute_signature_vectors(x,cvecs);
+ for(auto &v:cvecs)
+ {
+ fprintf(stderr,"%lu:",v.sizeof_vec);
+ for(size_t i=0;i<v.sizeof_vec;++i)
+ fprintf(stderr," %d",v.vec[i]);
+ fprintf(stderr,"\n");
+ }
+ ctr=0;
+ puts("\ncomparing signature vectors...");
+ std::vector<std::tuple<size_t,size_t,double>> r;
+ compare_signature_vectors(cvecs,r);
+ puts("");
+ for(auto &t:r)
+ printf("%s<->%s: %lf\n",x[std::get<0>(t)].c_str(),x[std::get<1>(t)].c_str(),std::get<2>(t));
+ printf("%lu similar images.",r.size());
+ for(auto &v:cvecs)puzzle_free_cvec(&pzctx,&v);
+ cvecs.clear();
+ puzzle_free_context(&pzctx);
+ return 0;
+}
diff --git a/deduper/libpuzzle/AUTHORS b/deduper/libpuzzle/AUTHORS
new file mode 100644
index 0000000..bb6ecb3
--- /dev/null
+++ b/deduper/libpuzzle/AUTHORS
@@ -0,0 +1 @@
+Frank DENIS <j at pureftpd.org>
diff --git a/deduper/libpuzzle/COPYING b/deduper/libpuzzle/COPYING
new file mode 100644
index 0000000..30877ad
--- /dev/null
+++ b/deduper/libpuzzle/COPYING
@@ -0,0 +1,17 @@
+/*
+ * ISC License
+ *
+ * Copyright (c) 2007-2015 Frank DENIS <j at pureftpd.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
diff --git a/deduper/libpuzzle/ChangeLog b/deduper/libpuzzle/ChangeLog
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/deduper/libpuzzle/ChangeLog
diff --git a/deduper/libpuzzle/Makefile.am b/deduper/libpuzzle/Makefile.am
new file mode 100644
index 0000000..fce7f7b
--- /dev/null
+++ b/deduper/libpuzzle/Makefile.am
@@ -0,0 +1,11 @@
+AUTOMAKE_OPTIONS = gnu
+
+EXTRA_DIST = \
+ autogen.sh \
+ THANKS \
+ README-PHP
+
+SUBDIRS = \
+ src \
+ man \
+ php
diff --git a/deduper/libpuzzle/NEWS b/deduper/libpuzzle/NEWS
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/deduper/libpuzzle/NEWS
diff --git a/deduper/libpuzzle/README b/deduper/libpuzzle/README
new file mode 100644
index 0000000..502a1c0
--- /dev/null
+++ b/deduper/libpuzzle/README
@@ -0,0 +1,202 @@
+
+ .:. LIBPUZZLE .:.
+
+ http://libpuzzle.pureftpd.org
+
+
+ ------------------------ BLURB ------------------------
+
+
+The Puzzle library is designed to quickly find visually similar images (gif,
+png, jpg), even if they have been resized, recompressed, recolored or slightly
+modified.
+
+The library is free, lightweight yet very fast, configurable, easy to use and
+it has been designed with security in mind. This is a C library, but it also
+comes with a command-line tool and PHP bindings.
+
+
+ ------------------------ REFERENCE ------------------------
+
+
+The Puzzle library is a implementation of "An image signature for any kind of
+image", by H. CHI WONG, Marschall BERN and David GOLDBERG.
+
+
+ ------------------------ COMPILATION ------------------------
+
+
+In order to load images, the library relies on the GD2 library.
+You need to install gdlib2 and its development headers before compiling
+libpuzzle.
+The GD2 library is available as a pre-built package for most operating systems.
+Debian and Ubuntu users should install the "libgd2-dev" or the "libgd2-xpm-dev"
+package.
+Gentoo users should install "media-libs/gd".
+OpenBSD, NetBSD and DragonflyBSD users should install the "gd" package.
+MacPorts users should install the "gd2" package.
+X11 support is not required for the Puzzle library.
+
+Once GD2 has been installed, configure the Puzzle library as usual:
+
+./configure
+
+This is a standard autoconf script, if you're not familiar with it, please
+have a look at the INSTALL file.
+
+Compile the beast:
+
+make
+
+Try the built-in tests:
+
+make check
+
+If everything looks fine, install the software:
+
+make install
+
+If anything goes wrong, please submit a bug report to:
+ libpuzzle [at] pureftpd [dot] org
+
+
+ ------------------------ USAGE ------------------------
+
+
+The API is documented in the libpuzzle(3) and puzzle_set(3) man pages.
+You can also play with the puzzle-diff test application.
+See puzzle-diff(8) for more info about the puzzle-diff application.
+
+In order to be thread-safe, every exported function of the library requires a
+PuzzleContext object. That object stores various run-time tunables.
+
+Out of a bitmap picture, the Puzzle library can fill a PuzzleCVec object :
+
+ PuzzleContext context;
+ PuzzleCVec cvec;
+
+ puzzle_init_context(&context);
+ puzzle_init_cvec(&context, &cvec);
+ puzzle_fill_cvec_from_file(&context, &cvec, "directory/filename.jpg");
+
+The PuzzleCvec structure holds two fields:
+ signed char *vec: a pointer to the first element of the vector
+ size_t sizeof_vec: the number of elements
+
+The size depends on the "lambdas" value (see puzzle_set(3)).
+
+PuzzleCvec structures can be compared:
+
+ d = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 1);
+
+d is the normalized distance between both vectors. If d is below 0.6, pictures
+are probably similar.
+
+If you need further help, feel free to subscribe to the mailing-list (see
+below).
+
+
+ ------------------------ INDEXING ------------------------
+
+
+How to quickly find similar pictures, if they are millions of records?
+
+The original paper has a simple, yet efficient answer.
+
+Cut the vector in fixed-length words. For instance, let's consider the
+following vector:
+
+[ a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+
+With a word length (K) of 10, you can get the following words:
+
+[ a b c d e f g h i j ] found at position 0
+[ b c d e f g h i j k ] found at position 1
+[ c d e f g h i j k l ] found at position 2
+etc. until position N-1
+
+Then, index your vector with a compound index of (word + position).
+
+Even with millions of images, K = 10 and N = 100 should be enough to have very
+little entries sharing the same index.
+
+Here's a very basic sample database schema:
+
++-----------------------------+
+| signatures |
++-----------------------------+
+| sig_id | signature | pic_id |
++--------+-----------+--------+
+
++--------------------------+
+| words |
++--------------------------+
+| pos_and_word | fk_sig_id |
++--------------+-----------+
+
+I'd recommend splitting at least the "words" table into multiple tables and/or
+servers.
+
+By default (lambas=9) signatures are 544 bytes long. In order to save storage
+space, they can be compressed to 1/third of their original size through the
+puzzle_compress_cvec() function. Before use, they must be uncompressed with
+puzzle_uncompress_cvec().
+
+
+ ------------------------ PUZZLE-DIFF ------------------------
+
+
+A command-line tool is also available for scripting or testing.
+
+It is installed as "puzzle-diff" and comes with a man page.
+
+Sample usage:
+
+- Output distance between two images:
+
+$ puzzle-diff pic-a-0.jpg pics-a-1.jpg
+0.102286
+
+- Compare two images, exit with 10 if they look the same, exit with 20 if
+they don't (may be useful for scripts):
+
+$ puzzle-diff -e pic-a-0.jpg pics-a-1.jpg
+$ echo $?
+10
+
+- Compute distance, without cropping and with computing the average intensity
+of the whole blocks:
+
+$ puzzle-diff -p 1.0 -c pic-a-0.jpg pic-a-1.jpg
+0.0523151
+
+
+ ------------------------ COMPARING IMAGES WITH PHP ------------------------
+
+
+A PHP extension is bundled with the Libpuzzle package, and it provides PHP
+bindings to most functions of the library.
+
+Documentation for the Libpuzzle PHP extension is available in the README-PHP
+file.
+
+
+ ------------------------ APPS USING LIBPUZZLE ------------------------
+
+
+Here are third-party projects using libpuzzle:
+
+* ftwin - http://jok.is-a-geek.net/ftwin.php
+ ftwin is a tool useful to find duplicate files according to their content on
+your file system.
+
+* Python bindings for libpuzzle: PyPuzzle
+ https://github.com/ArchangelSDY/PyPuzzle
+
+
+ ------------------------ STATUS ------------------------
+
+
+This project is unfortunately not maintained any more. Pull requests are
+always welcome, but I don't use this library any more and I don't have enough
+spare time to actively work on it.
diff --git a/deduper/libpuzzle/README-PHP b/deduper/libpuzzle/README-PHP
new file mode 100644
index 0000000..6b14fb9
--- /dev/null
+++ b/deduper/libpuzzle/README-PHP
@@ -0,0 +1,76 @@
+
+ .:. LIBPUZZLE - PHP EXTENSION .:.
+
+ http://libpuzzle.pureftpd.org
+
+
+ ------------------------ PHP EXTENSION ------------------------
+
+
+The Puzzle library can also be used through PHP, using a native extension.
+
+Prerequisites are the PHP headers, libtool, autoconf and automake.
+
+Here are the basic steps in order to install the extension:
+
+(on OpenBSD: export AUTOMAKE_VERSION=1.9 ; export AUTOCONF_VERSION=2.61)
+
+cd php/libpuzzle
+phpize
+./configure --with-libpuzzle
+make clean
+make
+make install
+
+If libpuzzle is installed in a non-standard location, use:
+./configure --with-libpuzzle=/base/directory/for/libpuzzle
+
+Then edit your php.ini file and add:
+
+extension=libpuzzle.so
+
+
+ ------------------------ USAGE ------------------------
+
+
+The PHP extension provides bindings for the following tuning functions:
+- puzzle_set_max_width()
+- puzzle_set_max_height()
+- puzzle_set_lambdas()
+- puzzle_set_noise_cutoff()
+- puzzle_set_p_ratio()
+- puzzle_set_contrast_barrier_for_cropping()
+- puzzle_set_max_cropping_ratio()
+- puzzle_set_autocrop()
+
+Have a look at the puzzle_set man page for more info about those.
+
+Getting the signature of a picture is as simple as:
+
+$signature = puzzle_fill_cvec_from_file($filename);
+
+In order to compute the similarity between two pictures using their
+signatures, use:
+
+$d = puzzle_vector_normalized_distance($signature1, $signature2);
+
+The result is between 0.0 and 1.0, with 0.6 being a good threshold to detect
+visually similar pictures.
+
+The PUZZLE_CVEC_SIMILARITY_THRESHOLD, PUZZLE_CVEC_SIMILARITY_HIGH_THRESHOLD,
+PUZZLE_CVEC_SIMILARITY_LOW_THRESHOLD and PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD
+constants can also be used to get common thresholds :
+
+if ($d < PUZZLE_CVEC_SIMILARITY_THRESHOLD) {
+ echo "Pictures look similar\n";
+}
+
+Before storing a signature into a database, you can compress it in order to
+save some storage space:
+
+$compressed_signature = puzzle_compress_cvec($signature);
+
+Before use, those compressed signatures must be uncompressed with:
+
+$signature = puzzle_uncompress_cvec($compressed_signature);
+
diff --git a/deduper/libpuzzle/THANKS b/deduper/libpuzzle/THANKS
new file mode 100644
index 0000000..86ef2e1
--- /dev/null
+++ b/deduper/libpuzzle/THANKS
@@ -0,0 +1,6 @@
+Xerox Research Center
+H. CHI WONG
+Marschall BERN
+David GOLDBERG
+Sameh CHAFIK
+Gregory MAXWELL
diff --git a/deduper/libpuzzle/autogen.sh b/deduper/libpuzzle/autogen.sh
new file mode 100755
index 0000000..4717fc4
--- /dev/null
+++ b/deduper/libpuzzle/autogen.sh
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+if [ -x "`which autoreconf 2>/dev/null`" ] ; then
+ exec autoreconf -ivf
+fi
+
+if glibtoolize --version > /dev/null 2>&1; then
+ LIBTOOLIZE='glibtoolize'
+else
+ LIBTOOLIZE='libtoolize'
+fi
+
+$LIBTOOLIZE && \
+aclocal && \
+autoheader && \
+automake --add-missing --force-missing --include-deps && \
+autoconf
diff --git a/deduper/libpuzzle/composer.json b/deduper/libpuzzle/composer.json
new file mode 100644
index 0000000..4cd00e2
--- /dev/null
+++ b/deduper/libpuzzle/composer.json
@@ -0,0 +1,10 @@
+{
+ "name": "jedisct1/libpuzzle",
+ "description": "A library to quickly find visually similar images.",
+ "version": "0.10.0",
+ "license": "MIT",
+ "type": "library",
+ "require": {
+ "php": "5.*"
+ }
+}
diff --git a/deduper/libpuzzle/configure.ac b/deduper/libpuzzle/configure.ac
new file mode 100644
index 0000000..1abf0f6
--- /dev/null
+++ b/deduper/libpuzzle/configure.ac
@@ -0,0 +1,70 @@
+# -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.61)
+AC_INIT(libpuzzle, 0.11, bugs@pureftpd.org)
+AC_CONFIG_SRCDIR([src/puzzle.h])
+AC_CONFIG_HEADER([config.h])
+AM_INIT_AUTOMAKE([1.9 dist-bzip2])
+AM_MAINTAINER_MODE
+
+# Checks for programs.
+AC_PROG_CXX
+AC_PROG_CC
+AC_PROG_CPP
+AC_PROG_INSTALL
+AC_PROG_LN_S
+AC_PROG_MAKE_SET
+AC_PATH_PROG(GDLIBCONFIG, [gdlib-config])
+CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE=1"
+CPPFLAGS="$CPPFLAGS `$GDLIBCONFIG --cflags`"
+LDFLAGS="$LDFLAGS `$GDLIBCONFIG --ldflags`"
+LDADD="$LDADD `$GDLIBCONFIG --libs`"
+
+# Checks for libraries.
+
+AC_CHECK_LIB([gd], [gdImageCreateFromGd2],,
+ AC_ERROR([libgd2 development files not found]))
+
+# Checks for header files.
+AC_HEADER_STDC
+AM_PROG_LIBTOOL
+AC_CHECK_HEADERS([limits.h memory.h stddef.h stdlib.h string.h unistd.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_TYPE_SIZE_T
+AC_TYPE_SSIZE_T
+AC_TYPE_OFF_T
+
+# Checks for library functions.
+AC_FUNC_MALLOC
+AC_FUNC_REALLOC
+AC_FUNC_MEMCMP
+AC_CHECK_FUNC([floor], ,[AC_CHECK_LIB([math], [floor])])
+AC_CHECK_FUNC([round], ,[AC_CHECK_LIB([math], [round])])
+AC_CHECK_FUNCS([strtoul])
+
+AC_SUBST([MAINT])
+
+AC_CONFIG_FILES([Makefile
+ man/Makefile
+ src/Makefile
+ src/pics/Makefile
+ php/Makefile
+ php/libpuzzle/Makefile
+ php/libpuzzle/include/Makefile
+ php/libpuzzle/modules/Makefile
+ php/libpuzzle/build/Makefile
+ php/libpuzzle/tests/Makefile
+ php/libpuzzle/tests/pics/Makefile
+ php/examples/Makefile
+ php/examples/similar/Makefile
+ ])
+AC_OUTPUT
+
+AC_MSG_NOTICE([+-------------------------------------------------------+])
+AC_MSG_NOTICE([| You can subscribe to the Libpuzzle users mailing-list |])
+AC_MSG_NOTICE([| to ask for help and to stay informed of new releases. |])
+AC_MSG_NOTICE([| Go to http://libpuzzle.pureftpd.org/ml/ now! |])
+AC_MSG_NOTICE([+-------------------------------------------------------+])
diff --git a/deduper/libpuzzle/man/Makefile.am b/deduper/libpuzzle/man/Makefile.am
new file mode 100644
index 0000000..a3a78a5
--- /dev/null
+++ b/deduper/libpuzzle/man/Makefile.am
@@ -0,0 +1,7 @@
+man_MANS = \
+ libpuzzle.3 \
+ puzzle_set.3 \
+ puzzle-diff.8
+
+EXTRA_DIST = \
+ $(man_MANS)
diff --git a/deduper/libpuzzle/man/libpuzzle.3 b/deduper/libpuzzle/man/libpuzzle.3
new file mode 100644
index 0000000..98cfcbb
--- /dev/null
+++ b/deduper/libpuzzle/man/libpuzzle.3
@@ -0,0 +1,296 @@
+.\"
+.\" Copyright (c) 2007-2014 Frank DENIS <j at pureftpd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: March 31 2011 $
+.Dt LIBPUZZLE 3
+.Sh NAME
+.Nm puzzle_init_cvec ,
+.Nm puzzle_init_dvec ,
+.Nm puzzle_fill_dvec_from_file ,
+.Nm puzzle_fill_cvec_from_file ,
+.Nm puzzle_fill_dvec_from_mem ,
+.Nm puzzle_fill_cvec_from_mem ,
+.Nm puzzle_fill_cvec_from_dvec ,
+.Nm puzzle_free_cvec ,
+.Nm puzzle_free_dvec ,
+.Nm puzzle_init_compressed_cvec ,
+.Nm puzzle_free_compressed_cvec ,
+.Nm puzzle_compress_cvec ,
+.Nm puzzle_uncompress_cvec ,
+.Nm puzzle_vector_normalized_distance
+.Nd compute comparable signatures of bitmap images.
+.Sh SYNOPSIS
+.Fd #include <puzzle.h>
+.Ft void
+.Fn puzzle_init_context "PuzzleContext *context"
+.Ft void
+.Fn puzzle_free_context "PuzzleContext *context"
+.Ft void
+.Fn puzzle_init_cvec "PuzzleContext *context" "PuzzleCvec *cvec"
+.Ft void
+.Fn puzzle_init_dvec "PuzzleContext *context" "PuzzleDvec *dvec"
+.Ft int
+.Fn puzzle_fill_dvec_from_file "PuzzleContext *context" "PuzzleDvec * dvec" "const char *file"
+.Ft int
+.Fn puzzle_fill_cvec_from_file "PuzzleContext *context" "PuzzleCvec * cvec" "const char *file"
+.Ft int
+.Fn puzzle_fill_dvec_from_mem "PuzzleContext *context" "PuzzleDvec * dvec" "const void *mem" "size_t size"
+.Ft int
+.Fn puzzle_fill_cvec_from_mem "PuzzleContext *context" "PuzzleCvec * cvec" "const void *mem" "size_t size"
+.Ft int
+.Fn puzzle_fill_cvec_from_dvec "PuzzleContext *context" "PuzzleCvec * cvec" "const PuzzleDvec *dvec"
+.Ft void
+.Fn puzzle_free_cvec "PuzzleContext *context" "PuzzleCvec *cvec"
+.Ft void
+.Fn puzzle_free_dvec "PuzzleContext *context" "PuzzleDvec *dvec"
+.Ft void
+.Fn puzzle_init_compressed_cvec "PuzzleContext *context" "PuzzleCompressedCvec * compressed_cvec"
+.Ft void
+.Fn puzzle_free_compressed_cvec "PuzzleContext *context" "PuzzleCompressedCvec * compressed_cvec"
+.Ft int
+.Fn puzzle_compress_cvec "PuzzleContext *context" "PuzzleCompressedCvec * compressed_cvec" "const PuzzleCvec * cvec"
+.Ft int
+.Fn puzzle_uncompress_cvec "PuzzleContext *context" "PuzzleCompressedCvec * compressed_cvec" "PuzzleCvec * const cvec"
+.Ft double
+.Fn puzzle_vector_normalized_distance "PuzzleContext *context" "const PuzzleCvec * cvec1" "const PuzzleCvec * cvec2" "int fix_for_texts"
+.Sh DESCRIPTION
+The Puzzle library computes a signature out of a bitmap picture.
+Signatures are comparable and similar pictures have similar signatures.
+.Pp
+After a picture has been loaded and uncompressed, featureless parts of
+the image are skipped (autocrop), unless that step has been explicitely
+disabled, see
+.Xr puzzle_set 3
+.Sh LIBPUZZLE CONTEXT
+Every public function requires a
+.Va PuzzleContext
+object, that stores every required tunables.
+.Pp
+Any application using libpuzzle should initialize a
+.Va PuzzleContext
+object with
+.Fn puzzle_init_context
+and free it after use with
+.Fn puzzle_free_context
+.Bd \-literal \-offset indent
+PuzzleContext context;
+
+puzzle_init_context(&context);
+ ...
+puzzle_free_context(&context);
+.Ed
+.Sh DVEC AND CVEC VECTORS
+The next step is to divide the cropped image into a grid and to compute
+the average intensity of soft\(hyedged pixels in every block. The result is a
+.Va PuzzleDvec
+object.
+.Pp
+.Va PuzzleDvec
+objects should be initialized before use, with
+.Fn puzzle_init_dvec
+and freed after use with
+.Fn puzzle_free_dvec
+.Pp
+The
+.Va PuzzleDvec
+structure has two important fields:
+.Va vec
+is the pointer to the first element of the array containing the average
+intensities, and
+.Va sizeof_compressed_vec
+is the number of elements.
+.Pp
+.Va PuzzleDvec
+objects are not comparable, so what you usually want is to transform these
+objects into
+.Va PuzzleCvec
+objects.
+.Pp
+A
+.Va PuzzleCvec
+object is a vector with relationships between adjacent blocks from a
+.Va PuzzleDvec
+object.
+.Pp
+The
+.Fn puzzle_fill_cvec_from_dvec
+fills a
+.Va PuzzleCvec
+object from a
+.Va PuzzleDvec
+object.
+.Pp
+But just like the other structure,
+.Va PuzzleCvec
+objects must be initialized and freed with
+.Fn puzzle_init_cvec
+and
+.Fn puzzle_free_cvec
+.Pp
+.Va PuzzleCvec
+objects have a vector whoose first element is in the
+.Va vec
+field, and the number of elements is in the
+.Va sizeof_vec
+field
+.Sh LOADING PICTURES
+.Va PuzzleDvec
+and
+.Va PuzzleCvec
+objects can be computed from a bitmap picture file, with
+.Fn puzzle_fill_dvec_from_file
+and
+.Fn puzzle_fill_cvec_from_file
+.Pp
+.Em GIF
+,
+.Em PNG
+and
+.Em JPEG
+files formats are currently supported and automatically recognized.
+.Pp
+Here's a simple example that creates a
+.Va PuzzleCvec
+objects out of a file.
+.Bd \-literal \-offset indent
+PuzzleContext context;
+PuzzleCvec cvec;
+
+puzzle_init_context(&context);
+puzzle_init_cvec(&context, &cvec);
+puzzle_fill_cvec_from_file(&context, &cvec, "test\-picture.jpg");
+ ...
+puzzle_free_cvec(&context, &cvec);
+puzzle_free_context(&context);
+.Ed
+.Sh COMPARING VECTORS
+In order to check whether two pictures are similar, you need to compare their
+.Va PuzzleCvec
+signatures, using
+.Fn puzzle_vector_normalized_distance
+.Pp
+That function returns a distance, between 0.0 and 1.0. The lesser, the nearer.
+.Pp
+Tests on common pictures show that a normalized distance of 0.6 (also defined as
+.Va PUZZLE_CVEC_SIMILARITY_THRESHOLD
+) means that both pictures are visually similar.
+.Pp
+If that threshold is not right for your set of pictures, you can experiment
+with
+.Va PUZZLE_CVEC_SIMILARITY_HIGH_THRESHOLD
+,
+.Va PUZZLE_CVEC_SIMILARITY_LOW_THRESHOLD
+and
+.Va PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD
+or with your own value.
+.Pp
+If the
+.Fa fix_for_texts
+of
+.Fn puzzle_vector_normalized_distance
+is
+.Em 1
+, a fix is applied to the computation in order to deal with bitmap pictures
+that contain text. That fix is recommended, as it allows using the same
+threshold for that kind of picture as for generic pictures.
+.Pp
+If
+.Fa fix_for_texts
+is
+.Em 0
+, that special way of computing the normalized distance is disabled.
+.Bd \-literal \-offset indent
+PuzzleContext context;
+PuzzleCvec cvec1, cvec2;
+double d;
+
+puzzle_init_context(&context);
+puzzle_init_cvec(&context, &cvec1);
+puzzle_init_cvec(&context, &cvec2);
+puzzle_fill_cvec_from_file(&context, &cvec1, "test\-picture\-1.jpg");
+puzzle_fill_cvec_from_file(&context, &cvec2, "test\-picture\-2.jpg");
+d = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 1);
+if (d < PUZZLE_CVEC_SIMILARITY_THRESHOLD) {
+ puts("Pictures are similar");
+}
+puzzle_free_cvec(&context, &cvec2);
+puzzle_free_cvec(&context, &cvec1);
+puzzle_free_context(&context);
+.Ed
+.Sh CVEC COMPRESSION
+In order to reduce storage needs,
+.Va PuzzleCvec
+objects can be compressed to 1/3 of their original size.
+.Pp
+.Va PuzzleCompressedCvec
+structures hold the compressed data. Before and after use, these structures
+have to be passed to
+.Fn puzzle_init_compressed_cvec
+and
+.Fn puzzle_free_compressed_cvec
+.Pp
+.Fn puzzle_compress_cvec
+compresses a
+.Va PuzzleCvec
+object into a
+.Va PuzzleCompressedCvec
+object.
+.Pp
+And
+.Fn puzzle_uncompress_cvec
+uncompresses a
+.Va PuzzleCompressedCvec
+object into a
+.Va PuzzleCvec
+object.
+.Bd \-literal \-offset indent
+PuzzleContext context;
+PuzzleCvec cvec;
+PuzzleCompressedCvec c_cvec;
+ ...
+puzzle_init_compressed_cvec(&context, &c_cvec);
+puzzle_compress_cvec(&context, &c_cvec, &cvec);
+ ...
+puzzle_free_compressed_cvec(&context, &c_cvec);
+.Ed
+The
+.Va PuzzleCompressedCvec
+structure has two important fields:
+.Va vec
+that is a pointer to the first element of the compressed data, and
+.Va sizeof_compressed_vec
+that contains the number of elements.
+.Sh RETURN VALUE
+Functions return
+.Em 0
+on success, and
+.Em \-1
+if something went wrong.
+.Sh AUTHORS
+.Nf
+Frank DENIS
+libpuzzle at pureftpd dot org
+.Fi
+.Sh ACKNOWLEDGMENTS
+.Nf
+Xerox Research Center
+H. CHI WONG
+Marschall BERN
+David GOLDBERG
+Sameh SCHAFIK
+.Fi
+.Sh SEE ALSO
+.Xr puzzle_set 3
+.Xr puzzle\-diff 8
diff --git a/deduper/libpuzzle/man/puzzle-diff.8 b/deduper/libpuzzle/man/puzzle-diff.8
new file mode 100644
index 0000000..5744b5a
--- /dev/null
+++ b/deduper/libpuzzle/man/puzzle-diff.8
@@ -0,0 +1,58 @@
+.\"
+.\" Copyright (c) 2007-2014 Frank DENIS <j at pureftpd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: September 23 2007 $
+.Dt PUZZLE-DIFF 1
+.Os
+.Sh NAME
+.Nm puzzle\-diff
+.Nd compare pictures with libpuzzle
+.Sh SYNOPSIS
+.Nm puzzle\-diff
+[\-b <contrast barrier for cropping] [\-c] [\-C <max cropping ratio>]
+[\-e] [\-E <similarity threshold>] [\-h] [\-H <max height>] [\-l <lambdas>]
+[\-n <noise cutoff>] [\-p <p ratio>] [\-t] [\-W <max width>]
+<file 1>
+<file 2>
+.Sh DESCRIPTION
+puzzle\-diff compares two pictures and outputs the normalized distance.
+.Pp
+Try
+.Em puzzle\-diff \-h
+for more info.
+.Sh EXAMPLES
+Output distance between two images:
+.Bd -literal -offset indent
+$ puzzle\-diff pic\-a\-0.jpg pics\-a\-1.jpg
+0.102286
+.Ed
+.Pp
+Compare two images, exit with 10 if they look the same, exit with 20 if
+they don't (may be useful for scripts):
+.Bd -literal -offset indent
+$ puzzle\-diff \-e pic\-a\-0.jpg pics\-a\-1.jpg
+$ echo $?
+10
+.Ed
+.Pp
+Compute distance, without cropping and with computing the average intensity
+of the whole blocks:
+.Bd -literal -offset indent
+$ puzzle\-diff \-p 1.0 \-c pic\-a\-0.jpg pic\-a\-1.jpg
+0.0523151
+.Ed
+.Sh SEE ALSO
+.Xr libpuzzle 3
+.Xr puzzle_set 3
diff --git a/deduper/libpuzzle/man/puzzle_set.3 b/deduper/libpuzzle/man/puzzle_set.3
new file mode 100644
index 0000000..a8d017b
--- /dev/null
+++ b/deduper/libpuzzle/man/puzzle_set.3
@@ -0,0 +1,129 @@
+.\"
+.\" Copyright (c) 2007-2014 Frank DENIS <j at pureftpd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: September 24 2007 $
+.Dt PUZZLE_SET 3
+.Sh NAME
+.Nm puzzle_set_max_width ,
+.Nm puzzle_set_max_height ,
+.Nm puzzle_set_lambdas ,
+.Nm puzzle_set_p_ratio ,
+.Nm puzzle_set_noise_cutoff ,
+.Nm puzzle_set_contrast_barrier_for_cropping ,
+.Nm puzzle_set_max_cropping_ratio ,
+.Nm puzzle_set_autocrop
+.Nd set tunables for libpuzzle functions.
+.Sh SYNOPSIS
+.Fd #include <puzzle.h>
+.Ft int
+.Fn puzzle_set_max_width "PuzzleContext *context" "unsigned int width"
+.Ft int
+.Fn puzzle_set_max_height "PuzzleContext *context" "unsigned int height"
+.Ft int
+.Fn puzzle_set_lambdas "PuzzleContext *context" "unsigned int lambdas"
+.Ft int
+.Fn puzzle_set_p_ratio "PuzzleContext *context" "double p_ratio"
+.Ft int
+.Fn puzzle_set_noise_cutoff "PuzzleContext *context" "double noise_cutoff"
+.Ft int
+.Fn puzzle_set_contrast_barrier_for_cropping "PuzzleContext *context" "double barrier"
+.Ft int
+.Fn puzzle_set_max_cropping_ratio "PuzzleContext *context" "double ratio"
+.Ft int
+.Fn puzzle_set_autocrop "PuzzleContext *context" "int enable"
+.Sh DESCRIPTION
+While default values have been chosen to be ok for most people, the
+.Fn puzzle_set_*
+functions are knobs to fit the algorithm to your set of data and to your
+applications.
+.Sh LAMBDAS
+By default, pictures are divided in 9 x 9 blocks.
+.Pp
+.Em 9
+is the
+.Em lambdas
+value, and it can be changed with
+.Fn puzzle_set_lambdas
+.Pp
+For large databases, for complex images, for images with a lot of text or
+for sets of near\(hysimilar images, it might be better to raise that value to
+.Em 11
+or even
+.Em 13
+.Pp
+However, raising that value obviously means that vectors will require more
+storage space.
+.Pp
+The
+.Em lambdas
+value should remain the same in order to get comparable vectors. So if you
+pick
+.Em 11
+(for instance), you should always use that value for all pictures you will
+compute a digest for.
+.Fn puzzle_set_p_ratio
+.Pp
+The average intensity of each block is based upon a small centered zone.
+.Pp
+The "p ratio" determines the size of that zone. The default is 2.0, and that
+ratio mimics the behavior that is described in the reference algorithm.
+.Pp
+For very specific cases (complex images) or if you get too many false
+positives, as an alternative to increasing lambdas, you can try to lower that
+value, for instance to 1.5.
+.Pp
+The lowest acceptable value is 1.0.
+.Sh MAXIMUM SIZES
+In order to avoid CPU starvation, pictures won't be processed if their width
+or height is larger than 3000 pixels.
+.Pp
+These limits are rather large, but if you ever need to change them, the
+.Fn puzzle_set_max_width
+and
+.Fn puzzle_set_max_height
+are available.
+.Sh NOISE CUTOFF
+The noise cutoff defaults to 2. If you raise that value, more zones with
+little difference of intensity will be considered as similar.
+.Pp
+Unless you have very specialized sets of pictures, you probably don't want
+to change this.
+.Sh AUTOCROP
+By default, featureless borders of the original image are ignored. The size
+of each border depends on the sum of absolute values of differences between
+adjacent pixels, relative to the total sum.
+.Pp
+That feature can be disabled with
+.Fn puzzle_set_autocrop "0"
+Any other value will enable it.
+.Pp
+.Fn puzzle_set_contrast_barrier_for_cropping
+changes the tolerance. The default value is 5. Less shaves less, more shaves
+more.
+.Pp
+.Fn puzzle_set_max_cropping_ratio
+This is a safe\(hyguard against unwanted excessive auto\(hycropping.
+.Pp
+The default (0.25) means that no more than 25% of the total width (or
+height) will ever be shaved.
+.Sh RETURN VALUE
+Functions return
+.Em 0
+on success, and
+.Em \-1
+if something went wrong.
+.Sh SEE ALSO
+.Xr libpuzzle 3
+.Xr puzzle\-diff 8
diff --git a/deduper/libpuzzle/php/Makefile.am b/deduper/libpuzzle/php/Makefile.am
new file mode 100644
index 0000000..dc0165f
--- /dev/null
+++ b/deduper/libpuzzle/php/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = \
+ libpuzzle \
+ examples
diff --git a/deduper/libpuzzle/php/examples/Makefile.am b/deduper/libpuzzle/php/examples/Makefile.am
new file mode 100644
index 0000000..82c81ba
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = \
+ similar
diff --git a/deduper/libpuzzle/php/examples/similar/Makefile.am b/deduper/libpuzzle/php/examples/similar/Makefile.am
new file mode 100644
index 0000000..126f6df
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = \
+ schema.sqlite3.sql \
+ schema.pgsql.sql \
+ similar.php \
+ similar.inc.php \
+ config.inc.php
diff --git a/deduper/libpuzzle/php/examples/similar/config.inc.php b/deduper/libpuzzle/php/examples/similar/config.inc.php
new file mode 100644
index 0000000..d4e3b41
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/config.inc.php
@@ -0,0 +1,9 @@
+<?php
+
+define('MAX_IMAGE_SIZE', 1024 * 1024 * 4);
+define('MAX_URL_SIZE', 255);
+define('DB_DSN', 'sqlite:similar.sqlite3');
+define('MAX_WORDS', 100);
+define('MAX_WORD_LENGTH', 10);
+
+?>
diff --git a/deduper/libpuzzle/php/examples/similar/schema.pgsql.sql b/deduper/libpuzzle/php/examples/similar/schema.pgsql.sql
new file mode 100644
index 0000000..7dc6bc1
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/schema.pgsql.sql
@@ -0,0 +1,230 @@
+--
+-- PostgreSQL database dump
+--
+
+SET client_encoding = 'UTF8';
+SET standard_conforming_strings = off;
+SET check_function_bodies = false;
+SET client_min_messages = warning;
+SET escape_string_warning = off;
+
+SET SESSION AUTHORIZATION 'similar';
+
+--
+-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: similar
+--
+
+COMMENT ON SCHEMA public IS 'Standard public schema';
+
+
+SET search_path = public, pg_catalog;
+
+SET default_tablespace = '';
+
+SET default_with_oids = false;
+
+--
+-- Name: pictures; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE pictures (
+ id integer NOT NULL,
+ digest character(32) NOT NULL,
+ CONSTRAINT ck_digest CHECK ((char_length(digest) = 32))
+);
+
+
+--
+-- Name: pictures_id_seq; Type: SEQUENCE; Schema: public; Owner: similar
+--
+
+CREATE SEQUENCE pictures_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: pictures_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: similar
+--
+
+ALTER SEQUENCE pictures_id_seq OWNED BY pictures.id;
+
+
+--
+-- Name: sentpictures; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE sentpictures (
+ id integer NOT NULL,
+ url character varying(255) NOT NULL,
+ sender character varying(100) NOT NULL,
+ picture_id integer NOT NULL,
+ CONSTRAINT ck_url CHECK (((url)::text <> ''::text))
+);
+
+
+--
+-- Name: sentpictures_id_seq; Type: SEQUENCE; Schema: public; Owner: similar
+--
+
+CREATE SEQUENCE sentpictures_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: sentpictures_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: similar
+--
+
+ALTER SEQUENCE sentpictures_id_seq OWNED BY sentpictures.id;
+
+
+--
+-- Name: signatures; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE signatures (
+ id integer NOT NULL,
+ compressed_signature bytea NOT NULL,
+ picture_id integer NOT NULL,
+ CONSTRAINT ck_signature CHECK ((octet_length(compressed_signature) >= 182))
+);
+
+
+--
+-- Name: signatures_id_seq; Type: SEQUENCE; Schema: public; Owner: similar
+--
+
+CREATE SEQUENCE signatures_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: signatures_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: similar
+--
+
+ALTER SEQUENCE signatures_id_seq OWNED BY signatures.id;
+
+
+--
+-- Name: words; Type: TABLE; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE TABLE words (
+ pos_and_word bytea NOT NULL,
+ signature_id integer NOT NULL,
+ CONSTRAINT ck_pos_and_word CHECK ((octet_length(pos_and_word) >= 2))
+);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: public; Owner: similar
+--
+
+ALTER TABLE pictures ALTER COLUMN id SET DEFAULT nextval('pictures_id_seq'::regclass);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: public; Owner: similar
+--
+
+ALTER TABLE sentpictures ALTER COLUMN id SET DEFAULT nextval('sentpictures_id_seq'::regclass);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: public; Owner: similar
+--
+
+ALTER TABLE signatures ALTER COLUMN id SET DEFAULT nextval('signatures_id_seq'::regclass);
+
+
+--
+-- Name: pictures_pkey; Type: CONSTRAINT; Schema: public; Owner: similar; Tablespace:
+--
+
+ALTER TABLE ONLY pictures
+ ADD CONSTRAINT pictures_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: sentpictures_pkey; Type: CONSTRAINT; Schema: public; Owner: similar; Tablespace:
+--
+
+ALTER TABLE ONLY sentpictures
+ ADD CONSTRAINT sentpictures_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: signatures_pkey; Type: CONSTRAINT; Schema: public; Owner: similar; Tablespace:
+--
+
+ALTER TABLE ONLY signatures
+ ADD CONSTRAINT signatures_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: idx_digest; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE UNIQUE INDEX idx_digest ON pictures USING btree (digest);
+
+
+--
+-- Name: idx_picture_id; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE INDEX idx_picture_id ON sentpictures USING btree (picture_id);
+
+
+--
+-- Name: idx_pos_and_word; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE INDEX idx_pos_and_word ON words USING btree (pos_and_word);
+
+
+--
+-- Name: idx_url; Type: INDEX; Schema: public; Owner: similar; Tablespace:
+--
+
+CREATE UNIQUE INDEX idx_url ON sentpictures USING btree (url);
+
+
+--
+-- Name: sentpictures_picture_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: similar
+--
+
+ALTER TABLE ONLY sentpictures
+ ADD CONSTRAINT sentpictures_picture_id_fkey FOREIGN KEY (picture_id) REFERENCES pictures(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+--
+-- Name: signatures_picture_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: similar
+--
+
+ALTER TABLE ONLY signatures
+ ADD CONSTRAINT signatures_picture_id_fkey FOREIGN KEY (picture_id) REFERENCES pictures(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+--
+-- Name: words_signature_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: similar
+--
+
+ALTER TABLE ONLY words
+ ADD CONSTRAINT words_signature_id_fkey FOREIGN KEY (signature_id) REFERENCES signatures(id) ON UPDATE CASCADE ON DELETE CASCADE;
+
+
+--
+-- PostgreSQL database dump complete
+--
+
diff --git a/deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql b/deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql
new file mode 100644
index 0000000..dc5a6c3
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/schema.sqlite3.sql
@@ -0,0 +1,23 @@
+CREATE TABLE pictures (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ digest CHAR(32) NOT NULL
+);
+CREATE TABLE sentpictures (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ url VARCHAR(255) NOT NULL,
+ sender VARCHAR(100) NOT NULL,
+ picture_id INTEGER NOT NULL
+);
+CREATE TABLE signatures (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ compressed_signature CHAR(182) NOT NULL,
+ picture_id INTEGER NOT NULL
+);
+CREATE TABLE words (
+ pos_and_word CHAR(5) NOT NULL,
+ signature_id INTEGER NOT NULL
+);
+CREATE UNIQUE INDEX idx_digest ON pictures(digest);
+CREATE INDEX idx_picture_id ON sentpictures (picture_id);
+CREATE INDEX idx_pos_and_word ON words(pos_and_word);
+CREATE UNIQUE INDEX idx_url ON sentpictures (url);
diff --git a/deduper/libpuzzle/php/examples/similar/similar.inc.php b/deduper/libpuzzle/php/examples/similar/similar.inc.php
new file mode 100644
index 0000000..cfc806e
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/similar.inc.php
@@ -0,0 +1,120 @@
+<?php
+
+function split_into_words($sig) {
+ $words = array();
+ $u = 0;
+ do {
+ $words[$u] = substr($sig, $u, MAX_WORD_LENGTH);
+ } while (++$u < MAX_WORDS);
+
+ return $words;
+}
+
+function save_signature($url, $client_info, $md5, $cvec) {
+ $compressed_cvec = puzzle_compress_cvec($cvec);
+ $words = split_into_words($cvec);
+ $dbh = new PDO(DB_DSN);
+ $dbh->beginTransaction();
+ try {
+ $st = $dbh->prepare
+ ('DELETE FROM sentpictures WHERE url = :url');
+ $st->execute(array(':url' => $url));
+ $st = $dbh->prepare
+ ('SELECT id FROM pictures WHERE digest = :digest');
+ $st->execute(array(':digest' => $md5));
+ $picture_id = $st->fetchColumn();
+ $st->closeCursor();
+ $duplicate = TRUE;
+ if ($picture_id === FALSE) {
+ $duplicate = FALSE;
+ $st = $dbh->prepare
+ ('INSERT INTO pictures (digest) VALUES (:digest)');
+ $st->execute(array(':digest' => $md5));
+ $picture_id = $dbh->lastInsertId('id');
+ }
+ $st = $dbh->prepare
+ ('INSERT INTO sentpictures (url, sender, picture_id) ' .
+ 'VALUES (:url, :sender, :picture_id)');
+ $st->execute(array(':url' => $url, ':sender' => $client_info,
+ ':picture_id' => $picture_id));
+ if ($duplicate === TRUE) {
+ $dbh->commit();
+ return TRUE;
+ }
+ $st = $dbh->prepare
+ ('INSERT INTO signatures (compressed_signature, picture_id) ' .
+ 'VALUES(:compressed_signature, :picture_id)');
+ $st->execute(array(':compressed_signature' => $compressed_cvec,
+ ':picture_id' => $picture_id));
+ $signature_id = $dbh->lastInsertId('id');
+ $st = $dbh->prepare
+ ('INSERT INTO words (pos_and_word, signature_id) ' .
+ 'VALUES (:pos_and_word, :signature_id)');
+ foreach ($words as $u => $word) {
+ $st->execute(array('pos_and_word'
+ => chr($u) . puzzle_compress_cvec($word),
+ 'signature_id' => $signature_id));
+ }
+ $dbh->commit();
+ } catch (Exception $e) {
+ var_dump($e);
+ $dbh->rollback();
+ }
+ return TRUE;
+}
+
+function find_similar_pictures($md5, $cvec,
+ $threshold = PUZZLE_CVEC_SIMILARITY_THRESHOLD) {
+ $compressed_cvec = puzzle_compress_cvec($cvec);
+ $words = split_into_words($cvec);
+ $dbh = new PDO(DB_DSN);
+ $dbh->beginTransaction();
+ $sql = 'SELECT DISTINCT(signature_id) AS signature_id FROM words ' .
+ 'WHERE pos_and_word IN (';
+ $coma = FALSE;
+ foreach ($words as $u => $word) {
+ if ($coma === TRUE) {
+ $sql .= ',';
+ }
+ $sql .= $dbh->quote(chr($u) . puzzle_compress_cvec($word));
+ $coma = TRUE;
+ }
+ $sql .= ')';
+ $res_words = $dbh->query($sql);
+ $scores = array();
+ $st = $dbh->prepare('SELECT compressed_signature, picture_id ' .
+ 'FROM signatures WHERE id = :id');
+ while (($signature_id = $res_words->fetchColumn()) !== FALSE) {
+ $st->execute(array(':id' => $signature_id));
+ $row = $st->fetch();
+ $found_compressed_signature = $row['compressed_signature'];
+ $picture_id = $row['picture_id'];
+ $found_cvec = puzzle_uncompress_cvec($found_compressed_signature);
+ $distance = puzzle_vector_normalized_distance($cvec, $found_cvec);
+ if ($distance < $threshold && $distance > 0.0) {
+ $scores[$picture_id] = $distance;
+ }
+ }
+ $sql = 'SELECT url FROM sentpictures WHERE picture_id IN (';
+ $coma = FALSE;
+ foreach ($scores as $picture_id => $score) {
+ if ($coma === TRUE) {
+ $sql .= ',';
+ }
+ $sql .= $dbh->quote($picture_id);
+ $coma = TRUE;
+ }
+ $sql .= ')';
+ $urls = array();
+ if (!empty($scores)) {
+ $res_urls = $dbh->query($sql);
+ while (($url = $res_urls->fetchColumn()) !== FALSE) {
+ array_push($urls, $url);
+ }
+ }
+ $dbh->commit();
+
+ return $urls;
+}
+
+?>
diff --git a/deduper/libpuzzle/php/examples/similar/similar.php b/deduper/libpuzzle/php/examples/similar/similar.php
new file mode 100644
index 0000000..4b3ad40
--- /dev/null
+++ b/deduper/libpuzzle/php/examples/similar/similar.php
@@ -0,0 +1,158 @@
+<html><!-- sample image search engine, part of the libpuzzle package -->
+<head>
+</head>
+<body>
+<h1>Similar images finder using <a href="http://libpuzzle.pureftpd.org">libpuzzle</a></h1>
+<?php
+
+error_reporting(E_ALL);
+
+require_once 'config.inc.php';
+require_once 'similar.inc.php';
+
+function display_form() {
+ echo '<form action="' . htmlspecialchars($_SERVER['REQUEST_URI']) . '" ' .
+ 'method="POST">' . "\n";
+ echo 'Enter an image URL (http only):' . "\n";
+ echo '<input type="text" size="100" value="" autocomplete="off" name="url" />' . "\n";
+ echo '<input type="submit" />';
+ echo '</form>' . "\n";
+}
+
+function display_error($err) {
+ echo '<div id="err"><strong>' . htmlspecialchars($err) . '</strong></div>' . "\n";
+}
+
+function display_loading() {
+ echo '<div id="loading">Loading...</div>' . "\n";
+ @ob_flush(); flush();
+}
+
+function display_loaded() {
+ echo '<div id="loaded">Loaded.</div>' . "\n";
+ @ob_flush(); flush();
+}
+
+function display_signature_ok() {
+ echo '<div id="sig-ok">Signature computed.</div>' . "\n";
+ @ob_flush(); flush();
+}
+
+function remove_tmpfile($file) {
+ @unlink($file);
+}
+
+function get_client_info() {
+ return @$_SERVER['REMOTE_ADDR'] . '/' . time();
+}
+
+function display_similar_pictures($urls) {
+ echo '<div id="images">' . "\n";
+ foreach ($urls as $url) {
+ echo '<a href="' . htmlentities($url) . '" ' .
+ 'onclick="window.open(this.href); return false;">';
+ echo ' <img src="' . htmlentities($url) . '" alt="" />';
+ echo '</a>' . "\n";
+
+ }
+ echo '</div>' . "\n";
+}
+
+function record_url($url, &$md5, &$cvec) {
+ if (function_exists('sys_get_temp_dir')) {
+ $tmpdir = sys_get_temp_dir();
+ } else {
+ $tmpdir = '/tmp';
+ }
+ $dfn = tempnam($tmpdir, 'similar-' . md5(uniqid(mt_rand(), TRUE)));
+ register_shutdown_function('remove_tmpfile', $dfn);
+ if (($dfp = fopen($dfn, 'w')) == FALSE) {
+ display_form();
+ display_error('Unable to create the temporary file');
+ return FALSE;
+ }
+ if (($fp = fopen($url, 'r')) == FALSE) {
+ display_form();
+ display_error('Unable to open: [' . $url . ']');
+ return FALSE;
+ }
+ $f = fread($fp, 4096);
+ $written = strlen($f);
+ if (empty($f)) {
+ display_form();
+ display_error('Unable to load: [' . $url . ']');
+ return FALSE;
+ }
+ fwrite($dfp, $f);
+ $infos = @getimagesize($dfn);
+ if (empty($infos) ||
+ ($infos[2] !== IMAGETYPE_GIF && $infos[2] !== IMAGETYPE_JPEG &&
+ $infos[2] !== IMAGETYPE_PNG) ||
+ $infos[0] < 50 || $infos[1] < 50) {
+ fclose($dfp);
+ display_form();
+ display_error('Unsupported image format');
+ return FALSE;
+ }
+ fseek($dfp, strlen($f));
+ while (!feof($fp)) {
+ $max = MAX_IMAGE_SIZE - $written;
+ if ($max > 65536) {
+ $max = 65536;
+ }
+ $t = fread($fp, $max);
+ fwrite($dfp, $t);
+ $written += strlen($t);
+ if ($written > MAX_IMAGE_SIZE) {
+ fclose($dfp);
+ display_form();
+ display_error('File too large');
+ return FALSE;
+ }
+ }
+ unset($t);
+ fclose($dfp);
+ display_loaded();
+ $md5 = @md5_file($dfn);
+ if (empty($md5)) {
+ display_form();
+ display_error('Unable to get the MD5 of the file');
+ return FALSE;
+ }
+ $cvec = puzzle_fill_cvec_from_file($dfn);
+ if (empty($cvec)) {
+ display_form();
+ display_error('Unable to compute image signature');
+ return FALSE;
+ }
+ display_signature_ok();
+ save_signature($url, get_client_info(), $md5, $cvec);
+
+ return TRUE;
+}
+
+$url = trim(@$_POST['url']);
+if (empty($url)) {
+ display_form();
+ exit(0);
+}
+if (strlen($url) > MAX_URL_SIZE ||
+ preg_match('£^http://([a-z0-9-]+[.])+[a-z]{2,}/.£i', $url) <= 0) {
+ display_form();
+ display_error('Invalid URL, must be http://...');
+ exit(1);
+}
+display_loading();
+$md5 = FALSE;
+$cvec = FALSE;
+if (record_url($url, $md5, $cvec) !== TRUE) {
+ exit(1);
+}
+$urls = find_similar_pictures($md5, $cvec);
+unset($cvec);
+display_form();
+display_similar_pictures($urls);
+
+?>
+</body>
+</html>
diff --git a/deduper/libpuzzle/php/libpuzzle/CREDITS b/deduper/libpuzzle/php/libpuzzle/CREDITS
new file mode 100644
index 0000000..bb6ecb3
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/CREDITS
@@ -0,0 +1 @@
+Frank DENIS <j at pureftpd.org>
diff --git a/deduper/libpuzzle/php/libpuzzle/EXPERIMENTAL b/deduper/libpuzzle/php/libpuzzle/EXPERIMENTAL
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/EXPERIMENTAL
diff --git a/deduper/libpuzzle/php/libpuzzle/LICENSE b/deduper/libpuzzle/php/libpuzzle/LICENSE
new file mode 100644
index 0000000..1ce2d05
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/LICENSE
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2007-2015 Frank DENIS <j at pureftpd.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
diff --git a/deduper/libpuzzle/php/libpuzzle/Makefile.am b/deduper/libpuzzle/php/libpuzzle/Makefile.am
new file mode 100644
index 0000000..f582035
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/Makefile.am
@@ -0,0 +1,15 @@
+EXTRA_DIST = \
+ CREDITS \
+ EXPERIMENTAL \
+ LICENSE \
+ README \
+ config.m4 \
+ libpuzzle.c \
+ libpuzzle.php \
+ php_libpuzzle.h
+
+SUBDIRS = \
+ build \
+ include \
+ modules \
+ tests
diff --git a/deduper/libpuzzle/php/libpuzzle/README b/deduper/libpuzzle/php/libpuzzle/README
new file mode 100644
index 0000000..7bb674f
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/README
@@ -0,0 +1,4 @@
+This is a PHP extension for libpuzzle.
+
+Have a look at the README-PHP file on top of the libpuzzle distribution for
+more info about that extension.
diff --git a/deduper/libpuzzle/php/libpuzzle/build/Makefile.am b/deduper/libpuzzle/php/libpuzzle/build/Makefile.am
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/build/Makefile.am
diff --git a/deduper/libpuzzle/php/libpuzzle/config.m4 b/deduper/libpuzzle/php/libpuzzle/config.m4
new file mode 100644
index 0000000..84f954a
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/config.m4
@@ -0,0 +1,49 @@
+dnl config.m4 for extension libpuzzle
+
+dnl If your extension references something external, use with:
+
+PHP_ARG_WITH(libpuzzle, for libpuzzle support,
+ [ --with-libpuzzle Include libpuzzle support])
+
+if test "$PHP_LIBPUZZLE" != "no"; then
+ for i in $PHP_LIBPUZZLE /usr/local /usr; do
+ if test -x "$i/bin/gdlib-config"; then
+ GDLIB_CONFIG=$i/bin/gdlib-config
+ break
+ fi
+ done
+ GDLIB_LIBS=$($GDLIB_CONFIG --ldflags --libs)
+ GDLIB_INCS=$($GDLIB_CONFIG --cflags)
+
+ PHP_EVAL_LIBLINE($GDLIB_LIBS, LIBPUZZLE_SHARED_LIBADD)
+ PHP_EVAL_INCLINE($GDLIB_INCS)
+
+ SEARCH_PATH="/usr/local /usr" # you might want to change this
+ SEARCH_FOR="/include/puzzle.h" # you most likely want to change this
+ if test -r $PHP_LIBPUZZLE/$SEARCH_FOR; then # path given as parameter
+ LIBPUZZLE_DIR=$PHP_LIBPUZZLE
+ else # search default path list
+ AC_MSG_CHECKING([for libpuzzle files in default path])
+ for i in $SEARCH_PATH ; do
+ if test -r $i/$SEARCH_FOR; then
+ LIBPUZZLE_DIR=$i
+ AC_MSG_RESULT(found in $i)
+ fi
+ done
+ fi
+
+ if test -z "$LIBPUZZLE_DIR"; then
+ AC_MSG_RESULT([not found])
+ AC_MSG_ERROR([Please reinstall the libpuzzle distribution])
+ fi
+
+ dnl # --with-libpuzzle -> add include path
+ PHP_ADD_INCLUDE($LIBPUZZLE_DIR/include)
+
+ PHP_ADD_LIBRARY_WITH_PATH(puzzle, $LIBPUZZLE_DIR/lib,
+ LIBPUZZLE_SHARED_LIBADD)
+
+ PHP_SUBST(LIBPUZZLE_SHARED_LIBADD)
+
+ PHP_NEW_EXTENSION(libpuzzle, libpuzzle.c, $ext_shared)
+fi
diff --git a/deduper/libpuzzle/php/libpuzzle/include/Makefile.am b/deduper/libpuzzle/php/libpuzzle/include/Makefile.am
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/include/Makefile.am
diff --git a/deduper/libpuzzle/php/libpuzzle/libpuzzle.c b/deduper/libpuzzle/php/libpuzzle/libpuzzle.c
new file mode 100644
index 0000000..82e84c3
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/libpuzzle.c
@@ -0,0 +1,410 @@
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "php.h"
+#include "php_ini.h"
+#include "ext/standard/info.h"
+#include <puzzle.h>
+#include "php_libpuzzle.h"
+
+ZEND_DECLARE_MODULE_GLOBALS(libpuzzle)
+
+/* True global resources - no need for thread safety here */
+static int le_libpuzzle;
+
+/* {{{ libpuzzle_functions[]
+ */
+zend_function_entry libpuzzle_functions[] = {
+ PHP_FE(puzzle_set_max_width, NULL)
+ PHP_FE(puzzle_set_max_height, NULL)
+ PHP_FE(puzzle_set_lambdas, NULL)
+ PHP_FE(puzzle_set_noise_cutoff, NULL)
+ PHP_FE(puzzle_set_p_ratio, NULL)
+ PHP_FE(puzzle_set_contrast_barrier_for_cropping, NULL)
+ PHP_FE(puzzle_set_max_cropping_ratio, NULL)
+ PHP_FE(puzzle_set_autocrop, NULL)
+
+ PHP_FE(puzzle_fill_cvec_from_file, NULL)
+ PHP_FE(puzzle_compress_cvec, NULL)
+ PHP_FE(puzzle_uncompress_cvec, NULL)
+ PHP_FE(puzzle_vector_normalized_distance, NULL)
+
+ {NULL, NULL, NULL} /* Must be the last line in libpuzzle_functions[] */
+};
+/* }}} */
+
+/* {{{ libpuzzle_module_entry
+ */
+zend_module_entry libpuzzle_module_entry = {
+#if ZEND_MODULE_API_NO >= 20010901
+ STANDARD_MODULE_HEADER,
+#endif
+ "libpuzzle",
+ libpuzzle_functions,
+ PHP_MINIT(libpuzzle),
+ PHP_MSHUTDOWN(libpuzzle),
+ PHP_RINIT(libpuzzle), /* Replace with NULL if there's nothing to do at request start */
+ PHP_RSHUTDOWN(libpuzzle), /* Replace with NULL if there's nothing to do at request end */
+ PHP_MINFO(libpuzzle),
+#if ZEND_MODULE_API_NO >= 20010901
+ "0.10", /* Replace with version number for your extension */
+#endif
+ STANDARD_MODULE_PROPERTIES
+};
+/* }}} */
+
+#ifdef COMPILE_DL_LIBPUZZLE
+ZEND_GET_MODULE(libpuzzle)
+#endif
+
+
+/* {{{ PHP_MINIT_FUNCTION
+ */
+PHP_MINIT_FUNCTION(libpuzzle)
+{
+ REGISTER_DOUBLE_CONSTANT("PUZZLE_CVEC_SIMILARITY_THRESHOLD",
+ PUZZLE_CVEC_SIMILARITY_THRESHOLD,
+ CONST_CS | CONST_PERSISTENT);
+ REGISTER_DOUBLE_CONSTANT("PUZZLE_CVEC_SIMILARITY_HIGH_THRESHOLD",
+ PUZZLE_CVEC_SIMILARITY_HIGH_THRESHOLD,
+ CONST_CS | CONST_PERSISTENT);
+ REGISTER_DOUBLE_CONSTANT("PUZZLE_CVEC_SIMILARITY_LOW_THRESHOLD",
+ PUZZLE_CVEC_SIMILARITY_LOW_THRESHOLD,
+ CONST_CS | CONST_PERSISTENT);
+ REGISTER_DOUBLE_CONSTANT("PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD",
+ PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD,
+ CONST_CS | CONST_PERSISTENT);
+ return SUCCESS;
+}
+/* }}} */
+
+/* {{{ PHP_MSHUTDOWN_FUNCTION
+ */
+PHP_MSHUTDOWN_FUNCTION(libpuzzle)
+{
+ return SUCCESS;
+}
+/* }}} */
+
+/* Remove if there's nothing to do at request start */
+/* {{{ PHP_RINIT_FUNCTION
+ */
+PHP_RINIT_FUNCTION(libpuzzle)
+{
+ puzzle_init_context(&LIBPUZZLE_G(global_context));
+ return SUCCESS;
+}
+/* }}} */
+
+/* Remove if there's nothing to do at request end */
+/* {{{ PHP_RSHUTDOWN_FUNCTION
+ */
+PHP_RSHUTDOWN_FUNCTION(libpuzzle)
+{
+ puzzle_free_context(&LIBPUZZLE_G(global_context));
+ return SUCCESS;
+}
+/* }}} */
+
+/* {{{ PHP_MINFO_FUNCTION
+ */
+PHP_MINFO_FUNCTION(libpuzzle)
+{
+ php_info_print_table_start();
+ php_info_print_table_header(2, "libpuzzle support", "enabled");
+ php_info_print_table_end();
+}
+/* }}} */
+
+/* {{{ proto string puzzle_fill_cvec_from_file(string filename)
+ * Creates a signature out of an image file */
+PHP_FUNCTION(puzzle_fill_cvec_from_file)
+{
+ char *arg = NULL;
+ int arg_len;
+ PuzzleContext *context;
+ PuzzleCvec cvec;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC,
+ "s", &arg, &arg_len) == FAILURE ||
+ arg_len <= 0) {
+ RETURN_FALSE;
+ }
+ puzzle_init_cvec(context, &cvec);
+ if (puzzle_fill_cvec_from_file(context, &cvec, arg) != 0) {
+ puzzle_free_cvec(context, &cvec);
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(cvec.vec, cvec.sizeof_vec, 1);
+ puzzle_free_cvec(context, &cvec);
+}
+/* }}} */
+
+/* {{{ proto string puzzle_compress_cvec(string cvec)
+ * Compress a signature to save storage space */
+PHP_FUNCTION(puzzle_compress_cvec)
+{
+ char *arg = NULL;
+ int arg_len;
+ PuzzleContext *context;
+ PuzzleCompressedCvec compressed_cvec;
+ PuzzleCvec cvec;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC,
+ "s", &arg, &arg_len) == FAILURE ||
+ arg_len <= 0) {
+ RETURN_FALSE;
+ }
+ puzzle_init_compressed_cvec(context, &compressed_cvec);
+ puzzle_init_cvec(context, &cvec);
+ cvec.vec = arg;
+ cvec.sizeof_vec = (size_t) arg_len;
+ if (puzzle_compress_cvec(context, &compressed_cvec, &cvec) != 0) {
+ puzzle_free_compressed_cvec(context, &compressed_cvec);
+ cvec.vec = NULL;
+ puzzle_free_cvec(context, &cvec);
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(compressed_cvec.vec,
+ compressed_cvec.sizeof_compressed_vec, 1);
+ puzzle_free_compressed_cvec(context, &compressed_cvec);
+ cvec.vec = NULL;
+ puzzle_free_cvec(context, &cvec);
+}
+/* }}} */
+
+/* {{{ proto string puzzle_uncompress_cvec(string compressed_cvec)
+ * Uncompress a compressed signature so that it can be used for computations */
+PHP_FUNCTION(puzzle_uncompress_cvec)
+{
+ char *arg = NULL;
+ int arg_len;
+ PuzzleContext *context;
+ PuzzleCompressedCvec compressed_cvec;
+ PuzzleCvec cvec;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC,
+ "s", &arg, &arg_len) == FAILURE ||
+ arg_len <= 0) {
+ RETURN_FALSE;
+ }
+ puzzle_init_compressed_cvec(context, &compressed_cvec);
+ puzzle_init_cvec(context, &cvec);
+ compressed_cvec.vec = arg;
+ compressed_cvec.sizeof_compressed_vec = (size_t) arg_len;
+ if (puzzle_uncompress_cvec(context, &compressed_cvec, &cvec) != 0) {
+ puzzle_free_cvec(context, &cvec);
+ compressed_cvec.vec = NULL;
+ puzzle_free_compressed_cvec(context, &compressed_cvec);
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(cvec.vec, cvec.sizeof_vec, 1);
+ puzzle_free_cvec(context, &cvec);
+ compressed_cvec.vec = NULL;
+ puzzle_free_compressed_cvec(context, &compressed_cvec);
+}
+/* }}} */
+
+/* {{{ proto double puzzle_vector_normalized_distance(string cvec1, string cvec2 [, bool fix_for_texts])
+ * Computes the distance between two signatures. Result is between 0.0 and 1.0 */
+PHP_FUNCTION(puzzle_vector_normalized_distance)
+{
+ char *vec1 = NULL, *vec2 = NULL;
+ int vec1_len, vec2_len;
+ PuzzleContext *context;
+ PuzzleCvec cvec1, cvec2;
+ double d;
+ zend_bool fix_for_texts;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
+ &vec1, &vec1_len, &vec2, &vec2_len, &fix_for_texts) == FAILURE ||
+ vec1_len <= 0 || vec2_len <= 0) {
+ RETURN_FALSE;
+ }
+ if (ZEND_NUM_ARGS() TSRMLS_CC < 3) {
+ fix_for_texts = (zend_bool) 1;
+ }
+ puzzle_init_cvec(context, &cvec1);
+ puzzle_init_cvec(context, &cvec2);
+ cvec1.vec = vec1;
+ cvec1.sizeof_vec = (size_t) vec1_len;
+ cvec2.vec = vec2;
+ cvec2.sizeof_vec = (size_t) vec2_len;
+ d = puzzle_vector_normalized_distance(context, &cvec1, &cvec2,
+ (int) fix_for_texts);
+ cvec1.vec = cvec2.vec = NULL;
+ puzzle_free_cvec(context, &cvec1);
+ puzzle_free_cvec(context, &cvec2);
+ RETVAL_DOUBLE(d);
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_max_width(int width)
+ * Set the maximum picture width */
+PHP_FUNCTION(puzzle_set_max_width)
+{
+ PuzzleContext *context;
+ long width;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "l", &width) == FAILURE ||
+ width <= 0L || width > INT_MAX) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_max_width(context, (unsigned int) width) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_max_height(int height)
+ * Set the maximum picture height */
+PHP_FUNCTION(puzzle_set_max_height)
+{
+ PuzzleContext *context;
+ long height;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "l", &height) == FAILURE ||
+ height <= 0L || height > INT_MAX) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_max_height(context, (unsigned int) height) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_lambdas(int lambdas)
+ * Set the size of the computation grid */
+PHP_FUNCTION(puzzle_set_lambdas)
+{
+ PuzzleContext *context;
+ long lambdas;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "l", &lambdas) == FAILURE ||
+ lambdas <= 0L || lambdas > INT_MAX) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_lambdas(context, (unsigned int) lambdas) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_noise_cutoff(double cutoff)
+ * Set the noise cutoff level */
+PHP_FUNCTION(puzzle_set_noise_cutoff)
+{
+ PuzzleContext *context;
+ double cutoff;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "d", &cutoff) == FAILURE) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_noise_cutoff(context, cutoff) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_p_ratio(double ratio)
+ * Set the p_ratio */
+PHP_FUNCTION(puzzle_set_p_ratio)
+{
+ PuzzleContext *context;
+ double p_ratio;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "d", &p_ratio) == FAILURE) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_p_ratio(context, p_ratio) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_contrast_barrier_for_cropping(double barrier)
+ * Set the tolerance level for cropping */
+PHP_FUNCTION(puzzle_set_contrast_barrier_for_cropping)
+{
+ PuzzleContext *context;
+ double barrier;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "d", &barrier) == FAILURE) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_contrast_barrier_for_cropping(context, barrier) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_max_cropping_ratio(double ratio)
+ * Set the maximum ratio between the cropped area and the whole picture */
+PHP_FUNCTION(puzzle_set_max_cropping_ratio)
+{
+ PuzzleContext *context;
+ double ratio;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "d", &ratio) == FAILURE) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_max_cropping_ratio(context, ratio) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/* {{{ proto bool puzzle_set_autocrop(bool autocrop)
+ * TRUE to enable autocropping, FALSE to disable */
+PHP_FUNCTION(puzzle_set_autocrop)
+{
+ PuzzleContext *context;
+ zend_bool autocrop;
+
+ context = &LIBPUZZLE_G(global_context);
+ if (zend_parse_parameters
+ (ZEND_NUM_ARGS() TSRMLS_CC, "b", &autocrop) == FAILURE) {
+ RETURN_FALSE;
+ }
+ if (puzzle_set_autocrop(context, (int) autocrop) != 0) {
+ RETURN_FALSE;
+ }
+ RETVAL_TRUE;
+}
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
diff --git a/deduper/libpuzzle/php/libpuzzle/libpuzzle.php b/deduper/libpuzzle/php/libpuzzle/libpuzzle.php
new file mode 100644
index 0000000..415273b
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/libpuzzle.php
@@ -0,0 +1,21 @@
+<?php
+$br = (php_sapi_name() == "cli")? "":"<br>";
+
+if(!extension_loaded('libpuzzle')) {
+ dl('libpuzzle.' . PHP_SHLIB_SUFFIX);
+}
+$module = 'libpuzzle';
+$functions = get_extension_funcs($module);
+echo "Functions available in the test extension:$br\n";
+foreach($functions as $func) {
+ echo $func."$br\n";
+}
+echo "$br\n";
+$function = 'confirm_' . $module . '_compiled';
+if (extension_loaded($module)) {
+ $str = $function($module);
+} else {
+ $str = "Module $module is not compiled into PHP";
+}
+echo "$str\n";
+?>
diff --git a/deduper/libpuzzle/php/libpuzzle/modules/Makefile.am b/deduper/libpuzzle/php/libpuzzle/modules/Makefile.am
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/modules/Makefile.am
diff --git a/deduper/libpuzzle/php/libpuzzle/php_libpuzzle.h b/deduper/libpuzzle/php/libpuzzle/php_libpuzzle.h
new file mode 100644
index 0000000..1fae819
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/php_libpuzzle.h
@@ -0,0 +1,66 @@
+#ifndef PHP_LIBPUZZLE_H
+#define PHP_LIBPUZZLE_H
+
+extern zend_module_entry libpuzzle_module_entry;
+#define phpext_libpuzzle_ptr &libpuzzle_module_entry
+
+#ifdef PHP_WIN32
+#define PHP_LIBPUZZLE_API __declspec(dllexport)
+#else
+#define PHP_LIBPUZZLE_API
+#endif
+
+#ifdef ZTS
+#include "TSRM.h"
+#endif
+
+PHP_MINIT_FUNCTION(libpuzzle);
+PHP_MSHUTDOWN_FUNCTION(libpuzzle);
+PHP_RINIT_FUNCTION(libpuzzle);
+PHP_RSHUTDOWN_FUNCTION(libpuzzle);
+PHP_MINFO_FUNCTION(libpuzzle);
+
+PHP_FUNCTION(puzzle_set_max_width);
+PHP_FUNCTION(puzzle_set_max_height);
+PHP_FUNCTION(puzzle_set_lambdas);
+PHP_FUNCTION(puzzle_set_noise_cutoff);
+PHP_FUNCTION(puzzle_set_p_ratio);
+PHP_FUNCTION(puzzle_set_contrast_barrier_for_cropping);
+PHP_FUNCTION(puzzle_set_max_cropping_ratio);
+PHP_FUNCTION(puzzle_set_autocrop);
+
+PHP_FUNCTION(puzzle_fill_cvec_from_file);
+PHP_FUNCTION(puzzle_compress_cvec);
+PHP_FUNCTION(puzzle_uncompress_cvec);
+PHP_FUNCTION(puzzle_vector_normalized_distance);
+
+ZEND_BEGIN_MODULE_GLOBALS(libpuzzle)
+ PuzzleContext global_context;
+ZEND_END_MODULE_GLOBALS(libpuzzle)
+
+/* In every utility function you add that needs to use variables
+ in php_libpuzzle_globals, call TSRMLS_FETCH(); after declaring other
+ variables used by that function, or better yet, pass in TSRMLS_CC
+ after the last function argument and declare your utility function
+ with TSRMLS_DC after the last declared argument. Always refer to
+ the globals in your function as LIBPUZZLE_G(variable). You are
+ encouraged to rename these macros something shorter, see
+ examples in any other php module directory.
+*/
+
+#ifdef ZTS
+#define LIBPUZZLE_G(v) TSRMG(libpuzzle_globals_id, zend_libpuzzle_globals *, v)
+#else
+#define LIBPUZZLE_G(v) (libpuzzle_globals.v)
+#endif
+
+#endif /* PHP_LIBPUZZLE_H */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/001.phpt b/deduper/libpuzzle/php/libpuzzle/tests/001.phpt
new file mode 100644
index 0000000..5a5f5b5
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/001.phpt
@@ -0,0 +1,10 @@
+--TEST--
+Check for libpuzzle presence
+--SKIPIF--
+<?php if (!extension_loaded("libpuzzle")) print "skip"; ?>
+--FILE--
+<?php
+echo "libpuzzle extension is available";
+?>
+--EXPECT--
+libpuzzle extension is available
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/002.phpt b/deduper/libpuzzle/php/libpuzzle/tests/002.phpt
new file mode 100644
index 0000000..d675145
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/002.phpt
@@ -0,0 +1,15 @@
+--TEST--
+Check for distance between similar images
+--SKIPIF--
+<?php if (!extension_loaded("libpuzzle")) print "skip"; ?>
+--FILE--
+<?php
+
+$cvec1 = puzzle_fill_cvec_from_file(dirname(__FILE__) . '/pics/pic-a-0.jpg');
+$cvec2 = puzzle_fill_cvec_from_file(dirname(__FILE__) . '/pics/pic-a-1.jpg');
+$d = puzzle_vector_normalized_distance($cvec1, $cvec2);
+exit((int) ($d < PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD));
+
+?>
+--EXPECT--
+1
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/003.phpt b/deduper/libpuzzle/php/libpuzzle/tests/003.phpt
new file mode 100644
index 0000000..ba7d5aa
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/003.phpt
@@ -0,0 +1,24 @@
+--TEST--
+Check the puzzle_set(3) interface
+--SKIPIF--
+<?php if (!extension_loaded("libpuzzle")) print "skip"; ?>
+--FILE--
+<?php
+
+$cvec1 = puzzle_fill_cvec_from_file(dirname(__FILE__) . '/pics/pic-a-0.jpg');
+$cvec2 = puzzle_fill_cvec_from_file(dirname(__FILE__) . '/pics/pic-a-1.jpg');
+puzzle_set_max_width(1500);
+puzzle_set_max_height(1500);
+puzzle_set_lambdas(11);
+puzzle_set_noise_cutoff(1.0);
+puzzle_set_p_ratio(2.0);
+puzzle_set_contrast_barrier_for_cropping(0.1);
+puzzle_set_max_cropping_ratio(0.1);
+puzzle_set_autocrop(FALSE);
+
+$d = puzzle_vector_normalized_distance($cvec1, $cvec2);
+exit((int) ($d < PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD));
+
+?>
+--EXPECT--
+1
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/Makefile.am b/deduper/libpuzzle/php/libpuzzle/tests/Makefile.am
new file mode 100644
index 0000000..14ded39
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/Makefile.am
@@ -0,0 +1,7 @@
+EXTRA_DIST = \
+ 001.phpt \
+ 002.phpt \
+ 003.phpt
+
+SUBDIRS = \
+ pics
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/pics/Makefile.am b/deduper/libpuzzle/php/libpuzzle/tests/pics/Makefile.am
new file mode 100644
index 0000000..0aacd9a
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/pics/Makefile.am
@@ -0,0 +1,3 @@
+EXTRA_DIST = \
+ pic-a-0.jpg \
+ pic-a-1.jpg
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-0.jpg b/deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-0.jpg
new file mode 100644
index 0000000..3dd4a3b
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-0.jpg
Binary files differ
diff --git a/deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-1.jpg b/deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-1.jpg
new file mode 100644
index 0000000..95f0e77
--- /dev/null
+++ b/deduper/libpuzzle/php/libpuzzle/tests/pics/pic-a-1.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/CMakeLists.txt b/deduper/libpuzzle/src/CMakeLists.txt
new file mode 100644
index 0000000..634ef38
--- /dev/null
+++ b/deduper/libpuzzle/src/CMakeLists.txt
@@ -0,0 +1,21 @@
+project(puzzle C)
+
+include(FindPkgConfig)
+pkg_search_module(gdlib REQUIRED gdlib)
+
+add_library(puzzle STATIC
+ globals.h
+ puzzle_common.h
+ puzzle_p.h
+ puzzle.h
+ compress.c
+ cvec.c
+ dvec.c
+ puzzle.c
+ tunables.c
+ vector_ops.c
+)
+target_include_directories(puzzle
+ PRIVATE
+ ${gdlib_INCLUDE_DIRS}
+)
diff --git a/deduper/libpuzzle/src/Makefile.am b/deduper/libpuzzle/src/Makefile.am
new file mode 100644
index 0000000..3016925
--- /dev/null
+++ b/deduper/libpuzzle/src/Makefile.am
@@ -0,0 +1,72 @@
+lib_LTLIBRARIES = \
+ libpuzzle.la
+
+libpuzzle_la_LDFLAGS = -version-info 1:0
+
+libpuzzle_la_SOURCES = \
+ puzzle.c \
+ tunables.c \
+ dvec.c \
+ cvec.c \
+ compress.c \
+ vector_ops.c \
+ puzzle_common.h \
+ puzzle_p.h \
+ globals.h \
+ puzzle.h
+
+include_HEADERS = \
+ puzzle.h
+
+noinst_HEADERS = \
+ puzzle_common.h \
+ puzzle_p.h \
+ globals.h
+
+bin_PROGRAMS = \
+ puzzle-diff
+
+puzzle_diff_SOURCES = \
+ puzzle-diff.c \
+ puzzle_common.h \
+ puzzle.h
+
+puzzle_diff_LDADD = \
+ libpuzzle.la
+
+TESTS = \
+ regress_1 \
+ regress_2 \
+ regress_3
+
+check_PROGRAMS = \
+ regress_1 \
+ regress_2 \
+ regress_3
+
+regress_1_SOURCES = \
+ regress_1.c \
+ puzzle_common.h \
+ puzzle.h
+
+regress_2_SOURCES = \
+ regress_2.c \
+ puzzle_common.h \
+ puzzle.h
+
+regress_3_SOURCES = \
+ regress_3.c \
+ puzzle_common.h \
+ puzzle.h
+
+regress_1_LDADD = \
+ libpuzzle.la
+
+regress_2_LDADD = \
+ libpuzzle.la
+
+regress_3_LDADD = \
+ libpuzzle.la
+
+SUBDIRS = \
+ pics
diff --git a/deduper/libpuzzle/src/compress.c b/deduper/libpuzzle/src/compress.c
new file mode 100644
index 0000000..e71da95
--- /dev/null
+++ b/deduper/libpuzzle/src/compress.c
@@ -0,0 +1,125 @@
+#include "puzzle_common.h"
+#include "puzzle_p.h"
+#include "puzzle.h"
+#include "globals.h"
+
+void puzzle_init_compressed_cvec(PuzzleContext * const context,
+ PuzzleCompressedCvec * const compressed_cvec)
+{
+ (void) context;
+ compressed_cvec->sizeof_compressed_vec = (size_t) 0U;
+ compressed_cvec->vec = NULL;
+}
+
+void puzzle_free_compressed_cvec(PuzzleContext * const context,
+ PuzzleCompressedCvec * const compressed_cvec)
+{
+ (void) context;
+ free(compressed_cvec->vec);
+ compressed_cvec->vec = NULL;
+}
+
+int puzzle_compress_cvec(PuzzleContext * const context,
+ PuzzleCompressedCvec * const compressed_cvec,
+ const PuzzleCvec * const cvec)
+{
+#define PC_NM(X) ((unsigned char) ((X) + 2))
+ size_t remaining = cvec->sizeof_vec;
+ const signed char *ptr;
+ unsigned char *cptr;
+
+ (void) context;
+ compressed_cvec->sizeof_compressed_vec =
+ (cvec->sizeof_vec + (size_t) 2U) / (size_t) 3U;
+ if ((compressed_cvec->vec =
+ calloc(compressed_cvec->sizeof_compressed_vec,
+ sizeof *compressed_cvec->vec)) == NULL) {
+ return -1;
+ }
+ ptr = cvec->vec;
+ cptr = compressed_cvec->vec;
+ while (remaining >= (size_t) 3U) {
+ *cptr++ = PC_NM(ptr[0]) + PC_NM(ptr[1]) * 5U +
+ PC_NM(ptr[2]) * (5U * 5U);
+ ptr += 3U;
+ remaining -= 3U;
+ }
+ if (remaining == (size_t) 1U) {
+ *cptr++ = PC_NM(ptr[0]);
+ compressed_cvec->vec[0] |= 128U;
+ } else if (remaining == (size_t) 2U) {
+ *cptr++ = PC_NM(ptr[0]) + PC_NM(ptr[1]) * 5U;
+ if (compressed_cvec->sizeof_compressed_vec < (size_t) 2U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ compressed_cvec->vec[1] |= 128U;
+ }
+ if ((size_t) (cptr - compressed_cvec->vec) !=
+ compressed_cvec->sizeof_compressed_vec) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ return 0;
+}
+
+int puzzle_uncompress_cvec(PuzzleContext * const context,
+ const PuzzleCompressedCvec * const compressed_cvec,
+ PuzzleCvec * const cvec)
+{
+#define PC_FL(X) ((X) & 127U)
+#define PC_NP(X) ((signed char) (X) - 2)
+
+ size_t remaining;
+ unsigned char trailing_bits;
+ const unsigned char *cptr = compressed_cvec->vec;
+ signed char *ptr;
+ unsigned char c;
+
+ (void) context;
+ if (cvec->vec != NULL) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if ((remaining = compressed_cvec->sizeof_compressed_vec) < (size_t) 2U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ trailing_bits = ((cptr[0] & 128U) >> 7) | ((cptr[1] & 128U) >> 6);
+ if (trailing_bits > 2U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ cvec->sizeof_vec = (size_t) 3U *
+ (compressed_cvec->sizeof_compressed_vec - trailing_bits) +
+ trailing_bits;
+ if (compressed_cvec->sizeof_compressed_vec >
+ SIZE_MAX / (size_t) 3U - (size_t) 2U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if ((cvec->vec = calloc(cvec->sizeof_vec, sizeof *cvec->vec)) == NULL) {
+ return -1;
+ }
+ if (trailing_bits != 0U) {
+ if (remaining <= (size_t) 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ remaining--;
+ }
+ ptr = cvec->vec;
+ while (remaining > (size_t) 0U) {
+ c = PC_FL(*cptr++);
+ *ptr++ = PC_NP(c % 5U);
+ c /= 5U;
+ *ptr++ = PC_NP(c % 5U);
+ c /= 5U;
+ *ptr++ = PC_NP(c % 5U);
+ remaining--;
+ }
+ if (trailing_bits == 1U) {
+ *ptr++ = PC_NP(PC_FL(*cptr) % 5U);
+ } else if (trailing_bits == 2U) {
+ c = PC_FL(*cptr);
+ *ptr++ = PC_NP(c % 5U);
+ *ptr++ = PC_NP(c / 5U % 5U);
+ }
+ if ((size_t) (ptr - cvec->vec) != cvec->sizeof_vec) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ return 0;
+}
diff --git a/deduper/libpuzzle/src/cvec.c b/deduper/libpuzzle/src/cvec.c
new file mode 100644
index 0000000..482b445
--- /dev/null
+++ b/deduper/libpuzzle/src/cvec.c
@@ -0,0 +1,202 @@
+#include "puzzle_common.h"
+#include "puzzle_p.h"
+#include "puzzle.h"
+#include "globals.h"
+
+static int puzzle_median_cmp(const void * const a_, const void * const b_)
+{
+ const double a = * (const double *) a_;
+ const double b = * (const double *) b_;
+
+ if (a < b) {
+ return -1;
+ } else if (a > b) {
+ return 1;
+ }
+ return 0;
+}
+
+static double puzzle_median(double * const vec, size_t size)
+{
+ size_t n;
+ size_t o;
+ double avg;
+
+ if (size <= (size_t) 0U) {
+ return 0.0;
+ }
+ qsort((void *) vec, size, sizeof *vec, puzzle_median_cmp);
+ if ((n = size / (size_t) 2U) == (size_t) 0U) {
+ if (size > (size_t) 1U) {
+ o = (size_t) 1U;
+ } else {
+ o = (size_t) 0U;
+ }
+ } else {
+ o = n + (size_t) 1U;
+ }
+ if (o < n) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ avg = (vec[n] + vec[o]) / 2.0;
+ if (avg < vec[n] || avg > vec[o]) {
+ avg = vec[n];
+ }
+ return avg;
+}
+
+int puzzle_fill_cvec_from_dvec(PuzzleContext * const context,
+ PuzzleCvec * const cvec,
+ const PuzzleDvec * const dvec)
+{
+ size_t s;
+ const double *dvecptr;
+ signed char *cvecptr;
+ double *lights = NULL, *darks = NULL;
+ size_t pos_lights = (size_t) 0U, pos_darks = (size_t) 0U;
+ size_t sizeof_lights, sizeof_darks;
+ double lighter_cutoff, darker_cutoff;
+ int err = 0;
+ double dv;
+
+ if ((cvec->sizeof_vec = dvec->sizeof_compressed_vec) <= (size_t) 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if ((cvec->vec = calloc(cvec->sizeof_vec, sizeof *cvec->vec)) == NULL) {
+ return -1;
+ }
+ sizeof_lights = sizeof_darks = cvec->sizeof_vec;
+ if ((lights = calloc(sizeof_lights, sizeof *lights)) == NULL ||
+ (darks = calloc(sizeof_darks, sizeof *darks)) == NULL) {
+ err = -1;
+ goto out;
+ }
+ dvecptr = dvec->vec;
+ s = cvec->sizeof_vec;
+ do {
+ dv = *dvecptr++;
+ if (dv >= - context->puzzle_noise_cutoff &&
+ dv <= context->puzzle_noise_cutoff) {
+ continue;
+ }
+ if (dv < context->puzzle_noise_cutoff) {
+ darks[pos_darks++] = dv;
+ if (pos_darks > sizeof_darks) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ } else if (dv > context->puzzle_noise_cutoff) {
+ lights[pos_lights++] = dv;
+ if (pos_lights > sizeof_lights) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ }
+ } while (--s != (size_t) 0U);
+ lighter_cutoff = puzzle_median(lights, pos_lights);
+ darker_cutoff = puzzle_median(darks, pos_darks);
+ free(lights);
+ lights = NULL;
+ free(darks);
+ darks = NULL;
+ dvecptr = dvec->vec;
+ cvecptr = cvec->vec;
+ s = cvec->sizeof_vec;
+ do {
+ dv = *dvecptr++;
+ if (dv >= - context->puzzle_noise_cutoff &&
+ dv <= context->puzzle_noise_cutoff) {
+ *cvecptr++ = 0;
+ } else if (dv < 0.0) {
+ *cvecptr++ = dv < darker_cutoff ? -2 : -1;
+ } else {
+ *cvecptr++ = dv > lighter_cutoff ? +2 : +1;
+ }
+ } while (--s != (size_t) 0U);
+ if ((size_t) (cvecptr - cvec->vec) != cvec->sizeof_vec) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ out:
+ free(lights);
+ free(darks);
+
+ return err;
+}
+
+void puzzle_init_cvec(PuzzleContext * const context, PuzzleCvec * const cvec)
+{
+ (void) context;
+ cvec->sizeof_vec = (size_t) 0U;
+ cvec->vec = NULL;
+}
+
+void puzzle_free_cvec(PuzzleContext * const context, PuzzleCvec * const cvec)
+{
+ (void) context;
+ free(cvec->vec);
+ cvec->vec = NULL;
+}
+
+int puzzle_dump_cvec(PuzzleContext * const context,
+ const PuzzleCvec * const cvec)
+{
+ size_t s = cvec->sizeof_vec;
+ const signed char *vecptr = cvec->vec;
+
+ (void) context;
+ if (s <= (size_t) 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ do {
+ printf("%d\n", *vecptr++);
+ } while (--s != (size_t) 0U);
+
+ return 0;
+}
+
+int puzzle_cvec_cksum(PuzzleContext * const context,
+ const PuzzleCvec * const cvec, unsigned int * const sum)
+{
+ size_t s = cvec->sizeof_vec;
+ const signed char *vecptr = cvec->vec;
+
+ (void) context;
+ *sum = 5381;
+ do {
+ *sum += *sum << 5;
+ *sum ^= (unsigned int) *vecptr++;
+ } while (--s != (size_t) 0U);
+
+ return 0;
+}
+
+int puzzle_fill_cvec_from_file(PuzzleContext * const context,
+ PuzzleCvec * const cvec,
+ const char * const file)
+{
+ PuzzleDvec dvec;
+ int ret;
+
+ puzzle_init_dvec(context, &dvec);
+ if ((ret = puzzle_fill_dvec_from_file(context, &dvec, file)) == 0) {
+ ret = puzzle_fill_cvec_from_dvec(context, cvec, &dvec);
+ }
+ puzzle_free_dvec(context, &dvec);
+
+ return ret;
+}
+
+int puzzle_fill_cvec_from_mem(PuzzleContext * const context,
+ PuzzleCvec * const cvec,
+ const void * const mem,
+ const size_t size)
+{
+ PuzzleDvec dvec;
+ int ret;
+
+ puzzle_init_dvec(context, &dvec);
+ if ((ret = puzzle_fill_dvec_from_mem(context, &dvec, mem, size)) == 0) {
+ ret = puzzle_fill_cvec_from_dvec(context, cvec, &dvec);
+ }
+ puzzle_free_dvec(context, &dvec);
+
+ return ret;
+}
diff --git a/deduper/libpuzzle/src/dvec.c b/deduper/libpuzzle/src/dvec.c
new file mode 100644
index 0000000..f5d21f9
--- /dev/null
+++ b/deduper/libpuzzle/src/dvec.c
@@ -0,0 +1,663 @@
+#include "puzzle_common.h"
+#include "puzzle_p.h"
+#include "puzzle.h"
+#include "globals.h"
+
+static void puzzle_init_view(PuzzleView * const view)
+{
+ view->width = view->height = 0U;
+ view->sizeof_map = (size_t) 0U;
+ view->map = NULL;
+}
+
+static void puzzle_free_view(PuzzleView * const view)
+{
+ free(view->map);
+ view->map = NULL;
+}
+
+static void puzzle_init_avglvls(PuzzleAvgLvls * const avglvls)
+{
+ avglvls->lambdas = 0U;
+ avglvls->sizeof_lvls = (size_t) 0U;
+ avglvls->lvls = NULL;
+}
+
+static void puzzle_free_avglvls(PuzzleAvgLvls * const avglvls)
+{
+ free(avglvls->lvls);
+ avglvls->lvls = NULL;
+}
+
+void puzzle_init_dvec(PuzzleContext * const context, PuzzleDvec * const dvec)
+{
+ (void) context;
+ dvec->sizeof_vec = dvec->sizeof_compressed_vec = (size_t) 0U;
+ dvec->vec = NULL;
+}
+
+void puzzle_free_dvec(PuzzleContext * const context, PuzzleDvec * const dvec)
+{
+ (void) context;
+ free(dvec->vec);
+ dvec->vec = NULL;
+}
+
+#define MAX_SIGNATURE_LENGTH 8U
+
+static PuzzleImageTypeCode puzzle_get_image_type_from_header(const unsigned char * const header)
+{
+ static const PuzzleImageType image_types[] = {
+ { (size_t) 4U, (const unsigned char *)
+ "GIF8", PUZZLE_IMAGE_TYPE_GIF },
+ { (size_t) 3U, (const unsigned char *)
+ "\xff\xd8\xff", PUZZLE_IMAGE_TYPE_JPEG },
+ { (size_t) 8U, (const unsigned char *)
+ "\x89PNG\r\n\x1a\n", PUZZLE_IMAGE_TYPE_PNG },
+ { (size_t) 0U, NULL, PUZZLE_IMAGE_TYPE_UNKNOWN }
+ };
+ const PuzzleImageType *image_type = image_types;
+ PuzzleImageTypeCode ret = PUZZLE_IMAGE_TYPE_UNKNOWN;
+ do {
+ if (image_type->sizeof_signature > MAX_SIGNATURE_LENGTH) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if (memcmp(header, image_type->signature,
+ image_type->sizeof_signature) == 0) {
+ ret = image_type->image_type_code;
+ break;
+ }
+ image_type++;
+ } while (image_type->signature != NULL);
+ return ret;
+}
+
+static PuzzleImageTypeCode puzzle_get_image_type_from_fp(FILE * const fp)
+{
+ unsigned char header[MAX_SIGNATURE_LENGTH];
+ PuzzleImageTypeCode ret = PUZZLE_IMAGE_TYPE_ERROR;
+ fpos_t pos;
+
+ if (fgetpos(fp, &pos) != 0) {
+ return PUZZLE_IMAGE_TYPE_ERROR;
+ }
+ rewind(fp);
+ if (fread(header, (size_t) 1U, sizeof header, fp) != sizeof header) {
+ goto bye;
+ }
+ ret = puzzle_get_image_type_from_header(header);
+ bye:
+ if (fsetpos(fp, &pos) != 0) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ return ret;
+}
+
+static int puzzle_autocrop_axis(PuzzleContext * const context,
+ PuzzleView * const view,
+ unsigned int * const crop0,
+ unsigned int * const crop1,
+ const unsigned int axisn,
+ const unsigned int axiso,
+ const int omaptrinc, const int nmaptrinc)
+{
+ double *chunk_contrasts;
+ size_t sizeof_chunk_contrasts;
+ double chunk_contrast = 0.0, total_contrast = 0.0, barrier_contrast;
+ unsigned char level = 0U;
+ unsigned char previous_level = 0U;
+ unsigned int chunk_n, chunk_o;
+ unsigned int chunk_n1, chunk_o1;
+ unsigned int max_crop;
+ const unsigned char *maptr;
+
+ chunk_n1 = axisn - 1U;
+ chunk_o1 = axiso - 1U;
+ *crop0 = 0U;
+ *crop1 = chunk_n1;
+ if (axisn < (unsigned int) PUZZLE_MIN_SIZE_FOR_CROPPING ||
+ axiso < (unsigned int) PUZZLE_MIN_SIZE_FOR_CROPPING) {
+ return 1;
+ }
+ sizeof_chunk_contrasts = chunk_n1 + 1U;
+ if ((chunk_contrasts = calloc(sizeof_chunk_contrasts,
+ sizeof *chunk_contrasts)) == NULL) {
+ return -1;
+ }
+ maptr = view->map;
+ if (axisn >= INT_MAX || axiso >= INT_MAX) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if (INT_MAX / axisn < axiso) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ chunk_n = chunk_n1;
+ do {
+ chunk_contrast = 0.0;
+ chunk_o = chunk_o1;
+ previous_level = *maptr;
+ do {
+ level = *maptr;
+ if (previous_level > level) {
+ chunk_contrast += (double) (previous_level - level);
+ } else {
+ chunk_contrast += (double) (level - previous_level);
+ }
+ previous_level = level;
+ maptr += omaptrinc;
+ } while (chunk_o-- != 0U);
+ chunk_contrasts[chunk_n] = chunk_contrast;
+ total_contrast += chunk_contrast;
+ maptr += nmaptrinc;
+ } while (chunk_n-- != 0U);
+ barrier_contrast =
+ total_contrast * context->puzzle_contrast_barrier_for_cropping;
+ total_contrast = 0.0;
+ *crop0 = 0U;
+ do {
+ total_contrast += chunk_contrasts[*crop0];
+ if (total_contrast >= barrier_contrast) {
+ break;
+ }
+ } while ((*crop0)++ < chunk_n1);
+ total_contrast = 0.0;
+ *crop1 = chunk_n1;
+ do {
+ total_contrast += chunk_contrasts[*crop1];
+ if (total_contrast >= barrier_contrast) {
+ break;
+ }
+ } while ((*crop1)-- > 0U);
+ free(chunk_contrasts);
+ if (*crop0 > chunk_n1 || *crop1 > chunk_n1) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ max_crop = (unsigned int)
+ round((double) chunk_n1 * context->puzzle_max_cropping_ratio);
+ if (max_crop > chunk_n1) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ *crop0 = MIN(*crop0, max_crop);
+ *crop1 = MAX(*crop1, chunk_n1 - max_crop);
+
+ return 0;
+}
+
+static int puzzle_autocrop_view(PuzzleContext * context,
+ PuzzleView * const view)
+{
+ unsigned int cropx0, cropx1;
+ unsigned int cropy0, cropy1;
+ unsigned int x, y;
+ unsigned char *maptr;
+
+ if (puzzle_autocrop_axis(context, view, &cropx0, &cropx1,
+ view->width, view->height,
+ (int) view->width,
+ 1 - (int) (view->width * view->height)) < 0 ||
+ puzzle_autocrop_axis(context, view, &cropy0, &cropy1,
+ view->height, view->width,
+ 1, 0) < 0) {
+ return -1;
+ }
+ if (cropx0 > cropx1 || cropy0 > cropy1) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ maptr = view->map;
+ y = cropy0;
+ do {
+ x = cropx0;
+ do {
+ *maptr++ = PUZZLE_VIEW_PIXEL(view, x, y);
+ } while (x++ != cropx1);
+ } while (y++ != cropy1);
+ view->width = cropx1 - cropx0 + 1U;
+ view->height = cropy1 - cropy0 + 1U;
+ view->sizeof_map = (size_t) view->width * (size_t) view->height;
+ if (view->width <= 0U || view->height <= 0U ||
+ SIZE_MAX / view->width < view->height) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ return 0;
+}
+
+static int puzzle_getview_from_gdimage(PuzzleContext * const context,
+ PuzzleView * const view,
+ gdImagePtr gdimage)
+{
+ unsigned int x, y;
+ const unsigned int x0 = 0U, y0 = 0U;
+ unsigned int x1, y1;
+ unsigned char *maptr;
+ int pixel;
+
+ view->map = NULL;
+ view->width = (unsigned int) gdImageSX(gdimage);
+ view->height = (unsigned int) gdImageSY(gdimage);
+ view->sizeof_map = (size_t) (view->width * view->height);
+ if (view->width > context->puzzle_max_width ||
+ view->height > context->puzzle_max_height) {
+ return -1;
+ }
+ if (view->sizeof_map <= (size_t) 0U ||
+ INT_MAX / view->width < view->height ||
+ SIZE_MAX / view->width < view->height ||
+ (unsigned int) view->sizeof_map != view->sizeof_map) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ x1 = view->width - 1U;
+ y1 = view->height - 1U;
+ if (view->width <= 0U || view->height <= 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if ((view->map = calloc(view->sizeof_map, sizeof *view->map)) == NULL) {
+ return -1;
+ }
+ if (x1 > INT_MAX || y1 > INT_MAX) { /* GD uses "int" for coordinates */
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ maptr = view->map;
+ x = x1;
+ if (gdImageTrueColor(gdimage) != 0) {
+ do {
+ y = y1;
+ do {
+ pixel = gdImageGetTrueColorPixel(gdimage, (int) x, (int) y);
+ *maptr++ = (unsigned char)
+ ((gdTrueColorGetRed(pixel) * 77 +
+ gdTrueColorGetGreen(pixel) * 151 +
+ gdTrueColorGetBlue(pixel) * 28 + 128) / 256);
+ } while (y-- != y0);
+ } while (x-- != x0);
+ } else {
+ do {
+ y = y1;
+ do {
+ pixel = gdImagePalettePixel(gdimage, x, y);
+ *maptr++ = (unsigned char)
+ ((gdimage->red[pixel] * 77 +
+ gdimage->green[pixel] * 151 +
+ gdimage->blue[pixel] * 28 + 128) / 256);
+ } while (y-- != y0);
+ } while (x-- != x0);
+ }
+ return 0;
+}
+
+static double puzzle_softedgedlvl(const PuzzleView * const view,
+ const unsigned int x, const unsigned int y)
+{
+ unsigned int lvl = 0U;
+ unsigned int ax, ay;
+ unsigned int count = 0U;
+ const unsigned int xlimit = x + PUZZLE_PIXEL_FUZZ_SIZE;
+ const unsigned int ylimit = y + PUZZLE_PIXEL_FUZZ_SIZE;
+ if (x >= view->width || y >= view->height || xlimit <= x || ylimit <= y) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if (x > PUZZLE_PIXEL_FUZZ_SIZE) {
+ ax = x - PUZZLE_PIXEL_FUZZ_SIZE;
+ } else {
+ ax = 0U;
+ }
+ do {
+ if (ax >= view->width) {
+ break;
+ }
+ if (y > PUZZLE_PIXEL_FUZZ_SIZE) {
+ ay = y - PUZZLE_PIXEL_FUZZ_SIZE;
+ } else {
+ ay = 0U;
+ }
+ do {
+ if (ay >= view->height) {
+ break;
+ }
+ count++;
+ lvl += (unsigned int) PUZZLE_VIEW_PIXEL(view, ax, ay);
+ } while (ay++ < ylimit);
+ } while (ax++ < xlimit);
+ if (count <= 0U) {
+ return 0.0;
+ }
+ return (double) lvl / (double) count;
+}
+
+static double puzzle_get_avglvl(const PuzzleView * const view,
+ const unsigned int x, const unsigned int y,
+ const unsigned int width,
+ const unsigned int height)
+{
+ double lvl = 0.0;
+ const unsigned int xlimit = x + width - 1U;
+ const unsigned int ylimit = y + height - 1U;
+ unsigned int ax, ay;
+
+ if (width <= 0U || height <= 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if (xlimit < x || ylimit < y) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ ax = x;
+ do {
+ if (ax >= view->width) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ ay = y;
+ do {
+ if (ay >= view->height) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ lvl += puzzle_softedgedlvl(view, ax, ay);
+ } while (ay++ < ylimit);
+ } while (ax++ < xlimit);
+
+ return lvl / (double) (width * height);
+}
+
+static int puzzle_fill_avglgls(PuzzleContext * const context,
+ PuzzleAvgLvls * const avglvls,
+ const PuzzleView * const view,
+ const unsigned int lambdas)
+{
+ double width = (double) view->width;
+ double height = (double) view->height;
+ double xshift, yshift;
+ double x, y;
+ unsigned int p;
+ unsigned int lx, ly;
+ unsigned int xd, yd;
+ unsigned int px, py;
+ unsigned int lwidth, lheight;
+ double avglvl;
+
+ avglvls->lambdas = lambdas;
+ avglvls->sizeof_lvls = (size_t) lambdas * lambdas;
+ if (UINT_MAX / lambdas < lambdas ||
+ (unsigned int) avglvls->sizeof_lvls != avglvls->sizeof_lvls) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if ((avglvls->lvls = calloc(avglvls->sizeof_lvls,
+ sizeof *avglvls->lvls)) == NULL) {
+ return -1;
+ }
+ xshift = (width -
+ (width * (double) lambdas / (double) SUCC(lambdas))) / 2.0;
+ yshift = (height -
+ (height * (double) lambdas / (double) SUCC(lambdas))) / 2.0;
+ p = (unsigned int) round(MIN(width, height) /
+ (SUCC(lambdas) * context->puzzle_p_ratio));
+ if (p < PUZZLE_MIN_P) {
+ p = PUZZLE_MIN_P;
+ }
+ lx = 0U;
+ do {
+ ly = 0U;
+ do {
+ x = xshift + (double) lx * PRED(width) / SUCC(lambdas);
+ y = yshift + (double) ly * PRED(height) / SUCC(lambdas);
+ lwidth = (unsigned int) round
+ (xshift + (double) SUCC(lx) * PRED(width) /
+ (double) SUCC(lambdas) - x);
+ lheight = (unsigned int) round
+ (yshift + (double) SUCC(ly) * PRED(height) /
+ (double) SUCC(lambdas) - y);
+ if (p < lwidth) {
+ xd = (unsigned int) round(x + (lwidth - p) / 2.0);
+ } else {
+ xd = (unsigned int) round(x);
+ }
+ if (p < lheight) {
+ yd = (unsigned int) round(y + (lheight - p) / 2.0);
+ } else {
+ yd = (unsigned int) round(y);
+ }
+ if (view->width - xd < p) {
+ px = 1U;
+ } else {
+ px = p;
+ }
+ if (view->height - yd < p) {
+ py = 1U;
+ } else {
+ py = p;
+ }
+ if (px > 0U && py > 0U) {
+ avglvl = puzzle_get_avglvl(view, xd, yd, px, py);
+ } else {
+ avglvl = 0.0;
+ }
+ PUZZLE_AVGLVL(avglvls, lx, ly) = avglvl;
+ } while (++ly < lambdas);
+ } while (++lx < lambdas);
+
+ return 0;
+}
+
+static unsigned int puzzle_add_neighbors(double ** const vecur,
+ const unsigned int max_neighbors,
+ const PuzzleAvgLvls * const avglvls,
+ const unsigned int lx,
+ const unsigned int ly)
+{
+ unsigned int ax, ay;
+ unsigned int xlimit, ylimit;
+ unsigned int neighbors = 0U;
+ const double ref = PUZZLE_AVGLVL(avglvls, lx, ly);
+
+ if (max_neighbors != 8U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if (lx >= avglvls->lambdas - 1U) {
+ xlimit = avglvls->lambdas - 1U;
+ } else {
+ xlimit = lx + 1U;
+ }
+ if (ly >= avglvls->lambdas - 1U) {
+ ylimit = avglvls->lambdas - 1U;
+ } else {
+ ylimit = ly + 1U;
+ }
+ if (lx <= 0U) {
+ ax = 0U;
+ } else {
+ ax = lx - 1U;
+ }
+ do {
+ if (ly <= 0U) {
+ ay = 0U;
+ } else {
+ ay = ly - 1U;
+ }
+ do {
+ if (ax == lx && ay == ly) {
+ continue;
+ }
+ *(*vecur)++ = ref - PUZZLE_AVGLVL(avglvls, ax, ay);
+ neighbors++;
+ if (neighbors <= 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ } while (ay++ < ylimit);
+ } while (ax++ < xlimit);
+ if (neighbors > max_neighbors) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ return neighbors;
+}
+
+static int puzzle_fill_dvec(PuzzleDvec * const dvec,
+ const PuzzleAvgLvls * const avglvls)
+{
+ unsigned int lambdas;
+ unsigned int lx, ly;
+ double *vecur;
+
+ lambdas = avglvls->lambdas;
+ dvec->sizeof_compressed_vec = (size_t) 0U;
+ dvec->sizeof_vec = (size_t) (lambdas * lambdas * PUZZLE_NEIGHBORS);
+ if (SIZE_MAX /
+ ((size_t) (lambdas * lambdas)) < (size_t) PUZZLE_NEIGHBORS ||
+ (unsigned int) dvec->sizeof_vec != dvec->sizeof_vec) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if ((dvec->vec = calloc(dvec->sizeof_vec, sizeof *dvec->vec)) == NULL) {
+ return -1;
+ }
+ vecur = dvec->vec;
+ lx = 0U;
+ do {
+ ly = 0U;
+ do {
+ (void) puzzle_add_neighbors(&vecur, PUZZLE_NEIGHBORS,
+ avglvls, lx, ly);
+ } while (++ly < lambdas);
+ } while (++lx < lambdas);
+ dvec->sizeof_compressed_vec = (size_t) (vecur - dvec->vec);
+
+ return 0;
+}
+
+static void puzzle_remove_transparency(gdImagePtr gdimage)
+{
+ int background = gdTrueColor(255, 255, 255);
+ int x, y, cpix;
+
+ gdImagePaletteToTrueColor(gdimage);
+
+ for (y = 0; y < gdImageSY(gdimage); y++) {
+ for (x = 0; x < gdImageSX(gdimage); x++) {
+ cpix = gdImageGetTrueColorPixel(gdimage, x, y);
+ gdImageSetPixel(gdimage, x, y, gdAlphaBlend(background, cpix));
+ }
+ }
+}
+
+static gdImagePtr puzzle_create_gdimage_from_file(const char * const file)
+{
+ gdImagePtr gdimage = NULL;
+ FILE *fp;
+ PuzzleImageTypeCode image_type_code;
+ if ((fp = fopen(file, "rb")) == NULL) {
+ return NULL;
+ }
+ image_type_code = puzzle_get_image_type_from_fp(fp);
+ switch (image_type_code) {
+ case PUZZLE_IMAGE_TYPE_JPEG:
+ gdimage = gdImageCreateFromJpeg(fp);
+ break;
+ case PUZZLE_IMAGE_TYPE_PNG:
+ gdimage = gdImageCreateFromPng(fp);
+ break;
+ case PUZZLE_IMAGE_TYPE_GIF:
+ gdimage = gdImageCreateFromGif(fp);
+ break;
+ default:
+ gdimage = NULL;
+ }
+ (void) fclose(fp);
+ return gdimage;
+}
+
+static gdImagePtr puzzle_create_gdimage_from_mem(const void * const mem, const size_t size)
+{
+ gdImagePtr gdimage = NULL;
+ PuzzleImageTypeCode image_type_code = puzzle_get_image_type_from_header(mem);
+ switch (image_type_code) {
+ case PUZZLE_IMAGE_TYPE_JPEG:
+ gdimage = gdImageCreateFromJpegPtr(size, (void *)mem);
+ break;
+ case PUZZLE_IMAGE_TYPE_PNG:
+ gdimage = gdImageCreateFromPngPtr(size, (void *)mem);
+ break;
+ case PUZZLE_IMAGE_TYPE_GIF:
+ gdimage = gdImageCreateFromGifPtr(size, (void *)mem);
+ break;
+ default:
+ gdimage = NULL;
+ }
+ return gdimage;
+}
+
+static int puzzle_fill_dvec_from_gdimage(PuzzleContext * const context,
+ PuzzleDvec * const dvec,
+ const gdImagePtr gdimage)
+{
+ PuzzleView view;
+ PuzzleAvgLvls avglvls;
+ int ret = 0;
+
+ if (context->magic != PUZZLE_CONTEXT_MAGIC) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ puzzle_init_view(&view);
+ puzzle_init_avglvls(&avglvls);
+ puzzle_init_dvec(context, dvec);
+ ret = puzzle_getview_from_gdimage(context, &view, gdimage);
+ if (ret != 0) {
+ goto out;
+ }
+ if (context->puzzle_enable_autocrop != 0 &&
+ (ret = puzzle_autocrop_view(context, &view)) < 0) {
+ goto out;
+ }
+ if ((ret = puzzle_fill_avglgls(context, &avglvls,
+ &view, context->puzzle_lambdas)) != 0) {
+ goto out;
+ }
+ ret = puzzle_fill_dvec(dvec, &avglvls);
+ out:
+ puzzle_free_view(&view);
+ puzzle_free_avglvls(&avglvls);
+
+ return ret;
+}
+
+int puzzle_fill_dvec_from_file(PuzzleContext * const context,
+ PuzzleDvec * const dvec,
+ const char * const file)
+{
+ int ret;
+ gdImagePtr gdimage = puzzle_create_gdimage_from_file(file);
+ if (gdimage == NULL) {
+ return -1;
+ }
+ puzzle_remove_transparency(gdimage);
+ ret = puzzle_fill_dvec_from_gdimage(context, dvec, gdimage);
+ gdImageDestroy(gdimage);
+ return ret;
+}
+
+int puzzle_fill_dvec_from_mem(PuzzleContext * const context,
+ PuzzleDvec * const dvec,
+ const void * const mem,
+ const size_t size)
+{
+ int ret;
+ gdImagePtr gdimage = puzzle_create_gdimage_from_mem(mem, size);
+ if (gdimage == NULL) {
+ return -1;
+ }
+ puzzle_remove_transparency(gdimage);
+ ret = puzzle_fill_dvec_from_gdimage(context, dvec, gdimage);
+ gdImageDestroy(gdimage);
+ return ret;
+}
+
+int puzzle_dump_dvec(PuzzleContext * const context,
+ const PuzzleDvec * const dvec)
+{
+ size_t s = dvec->sizeof_compressed_vec;
+ const double *vecptr = dvec->vec;
+
+ (void) context;
+ if (s <= (size_t) 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ do {
+ printf("%g\n", *vecptr++);
+ } while (--s != (size_t) 0U);
+
+ return 0;
+}
diff --git a/deduper/libpuzzle/src/globals.h b/deduper/libpuzzle/src/globals.h
new file mode 100644
index 0000000..757c5c7
--- /dev/null
+++ b/deduper/libpuzzle/src/globals.h
@@ -0,0 +1,26 @@
+#ifndef __GLOBALS_H__
+#define __GLOBALS_H__ 1
+
+#ifdef DEFINE_GLOBALS
+# define GLOBAL0(A) A
+# define GLOBAL(A, B) A = B
+#else
+# define GLOBAL0(A) extern A
+# define GLOBAL(A, B) extern A
+#endif
+
+GLOBAL(PuzzleContext puzzle_global_context,
+{
+ /* unsigned int puzzle_max_width */ PUZZLE_DEFAULT_MAX_WIDTH _COMA_
+ /* unsigned int puzzle_max_height */ PUZZLE_DEFAULT_MAX_HEIGHT _COMA_
+ /* unsigned int puzzle_lambdas */ PUZZLE_DEFAULT_LAMBDAS _COMA_
+ /* double puzzle_p_ratio */ PUZZLE_DEFAULT_P_RATIO _COMA_
+ /* double puzzle_noise_cutoff */ PUZZLE_DEFAULT_NOISE_CUTOFF _COMA_
+ /* double puzzle_contrast_barrier_for_cropping */
+ PUZZLE_DEFAULT_CONTRAST_BARRIER_FOR_CROPPING _COMA_
+ /* double puzzle_max_cropping_ratio */
+ PUZZLE_DEFAULT_MAX_CROPPING_RATIO _COMA_
+ /* int puzzle_enable_autocrop */ PUZZLE_DEFAULT_ENABLE_AUTOCROP _COMA_
+ /* unsigned long magic */ PUZZLE_CONTEXT_MAGIC _COMA_
+});
+#endif
diff --git a/deduper/libpuzzle/src/pics/Makefile.am b/deduper/libpuzzle/src/pics/Makefile.am
new file mode 100644
index 0000000..510311f
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = \
+ pic-a-0.jpg \
+ pic-a-1.jpg \
+ luxmarket_tshirt01.jpg \
+ luxmarket_tshirt01_black.jpg \
+ luxmarket_tshirt01_sal.jpg \
+ luxmarket_tshirt01_sheum.jpg \
+ duck.gif
diff --git a/deduper/libpuzzle/src/pics/duck.gif b/deduper/libpuzzle/src/pics/duck.gif
new file mode 100644
index 0000000..96c3037
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/duck.gif
Binary files differ
diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg
new file mode 100644
index 0000000..ffaf7eb
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg
new file mode 100644
index 0000000..73cac7b
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg
new file mode 100644
index 0000000..cb0cefe
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg
new file mode 100644
index 0000000..185393c
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/pics/pic-a-0.jpg b/deduper/libpuzzle/src/pics/pic-a-0.jpg
new file mode 100644
index 0000000..3dd4a3b
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/pic-a-0.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/pics/pic-a-1.jpg b/deduper/libpuzzle/src/pics/pic-a-1.jpg
new file mode 100644
index 0000000..95f0e77
--- /dev/null
+++ b/deduper/libpuzzle/src/pics/pic-a-1.jpg
Binary files differ
diff --git a/deduper/libpuzzle/src/puzzle-diff.c b/deduper/libpuzzle/src/puzzle-diff.c
new file mode 100644
index 0000000..e0f3626
--- /dev/null
+++ b/deduper/libpuzzle/src/puzzle-diff.c
@@ -0,0 +1,130 @@
+#include "puzzle_common.h"
+#include "puzzle.h"
+
+typedef struct Opts_ {
+ const char *file1;
+ const char *file2;
+ int fix_for_texts;
+ int exit;
+ double similarity_threshold;
+} Opts;
+
+void usage(void)
+{
+ puts("\nUsage: puzzle-diff [-b <contrast barrier for cropping] [-c]\n"
+ " [-C <max cropping ratio>] [-e] [-E similarity threshold] [-h]\n"
+ " [-H <max height>] [-l <lambdas>] [-n <noise cutoff>]\n"
+ " [-p <p ratio>] [-t] [-W <max width>] <file 1> <file 2>\n\n"
+ "Visually compares two images and returns their distance.\n\n"
+ "-b <contrast barrier for cropping>\n"
+ "-c : disable autocrop\n"
+ "-C <max cropping ratio>\n"
+ "-e : exit with 10 (images are similar) or 20 (images are not)\n"
+ "-E <similarity threshold> : for -e\n"
+ "-h : show help\n"
+ "-H <width> : set max height\n"
+ "-l <lambdas> : change lambdas\n"
+ "-n <noise cutoff> : change noise cutoff\n"
+ "-p <ratio> : set p ratio\n"
+ "-t disable fix for texts\n"
+ "-W <width> : set max width\n"
+ "\n");
+ exit(EXIT_SUCCESS);
+}
+
+int parse_opts(Opts * const opts, PuzzleContext * context,
+ int argc, char * const *argv) {
+ int opt;
+ extern char *optarg;
+ extern int optind;
+
+ opts->fix_for_texts = 1;
+ opts->exit = 0;
+ opts->similarity_threshold = PUZZLE_CVEC_SIMILARITY_THRESHOLD;
+ while ((opt = getopt(argc, argv, "b:cC:eE:hH:l:n:p:tW:")) != -1) {
+ switch (opt) {
+ case 'b':
+ puzzle_set_contrast_barrier_for_cropping(context, atof(optarg));
+ break;
+ case 'c':
+ puzzle_set_autocrop(context, 0);
+ break;
+ case 'C':
+ puzzle_set_max_cropping_ratio(context, atof(optarg));
+ break;
+ case 'e':
+ opts->exit = 1;
+ break;
+ case 'E':
+ opts->similarity_threshold = atof(optarg);
+ break;
+ case 'h':
+ usage();
+ /* NOTREACHED */
+ case 'H':
+ puzzle_set_max_height(context, strtoul(optarg, NULL, 10));
+ break;
+ case 'l':
+ puzzle_set_lambdas(context, strtoul(optarg, NULL, 10));
+ break;
+ case 'n':
+ puzzle_set_noise_cutoff(context, atof(optarg));
+ break;
+ case 'p':
+ puzzle_set_p_ratio(context, atof(optarg));
+ break;
+ case 't':
+ opts->fix_for_texts = 0;
+ break;
+ case 'W':
+ puzzle_set_max_width(context, strtoul(optarg, NULL, 10));
+ break;
+ default:
+ usage();
+ /* NOTREACHED */
+ }
+ }
+ argc -= optind;
+ argv += optind;
+ if (argc != 2) {
+ usage();
+ }
+ opts->file1 = *argv++;
+ opts->file2 = *argv;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ Opts opts;
+ PuzzleContext context;
+ PuzzleCvec cvec1, cvec2;
+ double d;
+
+ puzzle_init_context(&context);
+ parse_opts(&opts, &context, argc, argv);
+ puzzle_init_cvec(&context, &cvec1);
+ puzzle_init_cvec(&context, &cvec2);
+ if (puzzle_fill_cvec_from_file(&context, &cvec1, opts.file1) != 0) {
+ fprintf(stderr, "Unable to read [%s]\n", opts.file1);
+ return 1;
+ }
+ if (puzzle_fill_cvec_from_file(&context, &cvec2, opts.file2) != 0) {
+ fprintf(stderr, "Unable to read [%s]\n", opts.file2);
+ return 1;
+ }
+ d = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2,
+ opts.fix_for_texts);
+ puzzle_free_cvec(&context, &cvec1);
+ puzzle_free_cvec(&context, &cvec2);
+ puzzle_free_context(&context);
+ if (opts.exit == 0) {
+ printf("%g\n", d);
+ return 0;
+ }
+ if (d > opts.similarity_threshold) {
+ return 20;
+ }
+ return 10;
+}
diff --git a/deduper/libpuzzle/src/puzzle.c b/deduper/libpuzzle/src/puzzle.c
new file mode 100644
index 0000000..e21c252
--- /dev/null
+++ b/deduper/libpuzzle/src/puzzle.c
@@ -0,0 +1,22 @@
+#define DEFINE_GLOBALS 1
+#include "puzzle_common.h"
+#include "puzzle_p.h"
+#include "puzzle.h"
+#include "globals.h"
+
+void puzzle_init_context(PuzzleContext * const context)
+{
+ *context = puzzle_global_context;
+}
+
+void puzzle_free_context(PuzzleContext * const context)
+{
+ (void) context;
+}
+
+void puzzle_err_bug(const char * const file, const int line)
+{
+ fprintf(stderr, "*BUG* File: [%s] Line: [%d]\n", file, line);
+ abort();
+}
+
diff --git a/deduper/libpuzzle/src/puzzle.h b/deduper/libpuzzle/src/puzzle.h
new file mode 100644
index 0000000..c31b43f
--- /dev/null
+++ b/deduper/libpuzzle/src/puzzle.h
@@ -0,0 +1,122 @@
+#ifndef __PUZZLE_H__
+#define __PUZZLE_H__ 1
+
+#define PUZZLE_VERSION_MAJOR 0
+#define PUZZLE_VERSION_MINOR 11
+
+#include "puzzle_common.h"
+
+typedef struct PuzzleDvec_ {
+ size_t sizeof_vec;
+ size_t sizeof_compressed_vec;
+ double *vec;
+} PuzzleDvec;
+
+typedef struct PuzzleCvec_ {
+ size_t sizeof_vec;
+ signed char *vec;
+} PuzzleCvec;
+
+typedef struct PuzzleCompressedCvec_ {
+ size_t sizeof_compressed_vec;
+ unsigned char *vec;
+} PuzzleCompressedCvec;
+
+typedef struct PuzzleContext_ {
+ unsigned int puzzle_max_width;
+ unsigned int puzzle_max_height;
+ unsigned int puzzle_lambdas;
+ double puzzle_p_ratio;
+ double puzzle_noise_cutoff;
+ double puzzle_contrast_barrier_for_cropping;
+ double puzzle_max_cropping_ratio;
+ int puzzle_enable_autocrop;
+ unsigned long magic;
+} PuzzleContext;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void puzzle_init_context(PuzzleContext * const context);
+void puzzle_free_context(PuzzleContext * const context);
+int puzzle_set_max_width(PuzzleContext * const context,
+ const unsigned int width);
+int puzzle_set_max_height(PuzzleContext * const context,
+ const unsigned int height);
+int puzzle_set_lambdas(PuzzleContext * const context,
+ const unsigned int lambdas);
+int puzzle_set_noise_cutoff(PuzzleContext * const context,
+ const double noise_cutoff);
+int puzzle_set_p_ratio(PuzzleContext * const context,
+ const double p_ratio);
+int puzzle_set_contrast_barrier_for_cropping(PuzzleContext * const context,
+ const double barrier);
+int puzzle_set_max_cropping_ratio(PuzzleContext * const context,
+ const double ratio);
+int puzzle_set_autocrop(PuzzleContext * const context,
+ const int enable);
+void puzzle_init_cvec(PuzzleContext * const context,
+ PuzzleCvec * const cvec);
+void puzzle_init_dvec(PuzzleContext * const context,
+ PuzzleDvec * const dvec);
+int puzzle_fill_dvec_from_file(PuzzleContext * const context,
+ PuzzleDvec * const dvec,
+ const char * const file);
+int puzzle_fill_cvec_from_file(PuzzleContext * const context,
+ PuzzleCvec * const cvec,
+ const char * const file);
+int puzzle_fill_dvec_from_mem(PuzzleContext * const context,
+ PuzzleDvec * const dvec,
+ const void * const mem,
+ const size_t size);
+int puzzle_fill_cvec_from_mem(PuzzleContext * const context,
+ PuzzleCvec * const cvec,
+ const void * const mem,
+ const size_t size);
+int puzzle_fill_cvec_from_dvec(PuzzleContext * const context,
+ PuzzleCvec * const cvec,
+ const PuzzleDvec * const dvec);
+void puzzle_free_cvec(PuzzleContext * const context,
+ PuzzleCvec * const cvec);
+void puzzle_free_dvec(PuzzleContext * const context,
+ PuzzleDvec * const dvec);
+int puzzle_dump_cvec(PuzzleContext * const context,
+ const PuzzleCvec * const cvec);
+int puzzle_dump_dvec(PuzzleContext * const context,
+ const PuzzleDvec * const dvec);
+int puzzle_cvec_cksum(PuzzleContext * const context,
+ const PuzzleCvec * const cvec, unsigned int * const sum);
+void puzzle_init_compressed_cvec(PuzzleContext * const context,
+ PuzzleCompressedCvec * const compressed_cvec);
+void puzzle_free_compressed_cvec(PuzzleContext * const context,
+ PuzzleCompressedCvec * const compressed_cvec);
+int puzzle_compress_cvec(PuzzleContext * const context,
+ PuzzleCompressedCvec * const compressed_cvec,
+ const PuzzleCvec * const cvec);
+int puzzle_uncompress_cvec(PuzzleContext * const context,
+ const PuzzleCompressedCvec * const compressed_cvec,
+ PuzzleCvec * const cvec);
+int puzzle_vector_sub(PuzzleContext * const context,
+ PuzzleCvec * const cvecr,
+ const PuzzleCvec * const cvec1,
+ const PuzzleCvec * const cvec2,
+ const int fix_for_texts);
+double puzzle_vector_euclidean_length(PuzzleContext * const context,
+ const PuzzleCvec * const cvec);
+double puzzle_vector_normalized_distance(PuzzleContext * const context,
+ const PuzzleCvec * const cvec1,
+ const PuzzleCvec * const cvec2,
+ const int fix_for_texts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#define PUZZLE_CVEC_SIMILARITY_THRESHOLD 0.6
+#define PUZZLE_CVEC_SIMILARITY_HIGH_THRESHOLD 0.7
+#define PUZZLE_CVEC_SIMILARITY_LOW_THRESHOLD 0.3
+#define PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD 0.2
+
+#define _COMA_ ,
+
+#endif
diff --git a/deduper/libpuzzle/src/puzzle_common.h b/deduper/libpuzzle/src/puzzle_common.h
new file mode 100644
index 0000000..ebd340b
--- /dev/null
+++ b/deduper/libpuzzle/src/puzzle_common.h
@@ -0,0 +1,18 @@
+#ifndef __PUZZLE_COMMON_H__
+#define __PUZZLE_COMMON_H__ 1
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#ifndef errno
+extern int errno;
+#endif
+
+#endif
diff --git a/deduper/libpuzzle/src/puzzle_p.h b/deduper/libpuzzle/src/puzzle_p.h
new file mode 100644
index 0000000..2f09494
--- /dev/null
+++ b/deduper/libpuzzle/src/puzzle_p.h
@@ -0,0 +1,67 @@
+#ifndef __PUZZLE_P_H__
+#define __PUZZLE_P_H__ 1
+
+#include <math.h>
+#include <gd.h>
+
+typedef struct PuzzleView_ {
+ unsigned int width;
+ unsigned int height;
+ size_t sizeof_map;
+ unsigned char *map;
+} PuzzleView;
+
+typedef struct PuzzleAvgLvls_ {
+ unsigned int lambdas;
+ size_t sizeof_lvls;
+ double *lvls;
+} PuzzleAvgLvls;
+
+typedef enum PuzzleImageTypeCode_ {
+ PUZZLE_IMAGE_TYPE_ERROR, PUZZLE_IMAGE_TYPE_UNKNOWN, PUZZLE_IMAGE_TYPE_JPEG,
+ PUZZLE_IMAGE_TYPE_GIF, PUZZLE_IMAGE_TYPE_PNG
+} PuzzleImageTypeCode;
+
+typedef struct PuzzleImageType_ {
+ const size_t sizeof_signature;
+ const unsigned char *signature;
+ const PuzzleImageTypeCode image_type_code;
+} PuzzleImageType;
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#define PUZZLE_DEFAULT_LAMBDAS 9
+#define PUZZLE_DEFAULT_MAX_WIDTH 3000
+#define PUZZLE_DEFAULT_MAX_HEIGHT 3000
+#define PUZZLE_DEFAULT_NOISE_CUTOFF 2.0
+#define PUZZLE_DEFAULT_P_RATIO 2.0
+#define PUZZLE_MIN_P 2
+#define PUZZLE_PIXEL_FUZZ_SIZE 1
+#define PUZZLE_NEIGHBORS 8
+#define PUZZLE_MIN_SIZE_FOR_CROPPING 100
+#if PUZZLE_MIN_SIZE_FOR_CROPPING < 4
+# error PUZZLE_MIN_SIZE_FOR_CROPPING
+#endif
+#define PUZZLE_DEFAULT_CONTRAST_BARRIER_FOR_CROPPING 0.05
+#define PUZZLE_DEFAULT_MAX_CROPPING_RATIO 0.25
+#define PUZZLE_DEFAULT_ENABLE_AUTOCROP 1
+
+#define PUZZLE_VIEW_PIXEL(V, X, Y) (*((V)->map + (V)->width * (Y) + (X)))
+#define PUZZLE_AVGLVL(A, X, Y) (*((A)->lvls + (A)->lambdas * (Y) + (X)))
+
+#define PUZZLE_CONTEXT_MAGIC 0xdeadbeef
+
+#ifndef MIN
+# define MIN(A, B) ((A) < (B) ? (A) : (B))
+#endif
+#ifndef MAX
+# define MAX(A, B) ((A) > (B) ? (A) : (B))
+#endif
+#define SUCC(A) ((A) + 1)
+#define PRED(A) ((A) - 1)
+
+void puzzle_err_bug(const char * const file, const int line);
+
+#endif
diff --git a/deduper/libpuzzle/src/regress_1.c b/deduper/libpuzzle/src/regress_1.c
new file mode 100644
index 0000000..80462b8
--- /dev/null
+++ b/deduper/libpuzzle/src/regress_1.c
@@ -0,0 +1,32 @@
+#include "puzzle_common.h"
+#include "puzzle.h"
+
+#define EXPECTED_RESULT 111444570
+
+int main(void)
+{
+ PuzzleContext context;
+ PuzzleCvec cvec;
+ PuzzleCompressedCvec compressed_cvec;
+ unsigned int sum;
+
+ puzzle_init_context(&context);
+ puzzle_init_compressed_cvec(&context, &compressed_cvec);
+ puzzle_init_cvec(&context, &cvec);
+ if (puzzle_fill_cvec_from_file(&context, &cvec,
+ "pics/luxmarket_tshirt01.jpg") != 0) {
+ fprintf(stderr, "File not found\n");
+ exit(0);
+ }
+ puzzle_compress_cvec(&context, &compressed_cvec, &cvec);
+ puzzle_free_cvec(&context, &cvec);
+ puzzle_init_cvec(&context, &cvec);
+ puzzle_uncompress_cvec(&context, &compressed_cvec, &cvec);
+ puzzle_cvec_cksum(&context, &cvec, &sum);
+ puzzle_free_cvec(&context, &cvec);
+ puzzle_free_compressed_cvec(&context, &compressed_cvec);
+ puzzle_free_context(&context);
+ printf("%u %u\n", sum, (unsigned int) EXPECTED_RESULT);
+
+ return sum != EXPECTED_RESULT;
+}
diff --git a/deduper/libpuzzle/src/regress_2.c b/deduper/libpuzzle/src/regress_2.c
new file mode 100644
index 0000000..a37b626
--- /dev/null
+++ b/deduper/libpuzzle/src/regress_2.c
@@ -0,0 +1,72 @@
+#include "puzzle_common.h"
+#include "puzzle.h"
+
+int main(void)
+{
+ PuzzleContext context;
+ PuzzleCvec cvec1, cvec2, cvec3, cvec4, cvec5, cvec6;
+ double d1, d2, d3, d4, d5, d6;
+
+ puzzle_init_context(&context);
+ puzzle_init_cvec(&context, &cvec1);
+ puzzle_init_cvec(&context, &cvec2);
+ puzzle_init_cvec(&context, &cvec3);
+ puzzle_init_cvec(&context, &cvec4);
+ puzzle_init_cvec(&context, &cvec5);
+ puzzle_init_cvec(&context, &cvec6);
+ if (puzzle_fill_cvec_from_file
+ (&context, &cvec1, "pics/luxmarket_tshirt01.jpg") != 0) {
+ fprintf(stderr, "File 1 not found\n");
+ exit(0);
+ }
+ if (puzzle_fill_cvec_from_file
+ (&context, &cvec2, "pics/luxmarket_tshirt01_black.jpg") != 0) {
+ fprintf(stderr, "File 2 not found\n");
+ exit(0);
+ }
+ if (puzzle_fill_cvec_from_file
+ (&context, &cvec3, "pics/luxmarket_tshirt01_sal.jpg") != 0) {
+ fprintf(stderr, "File 3 not found\n");
+ exit(0);
+ }
+ if (puzzle_fill_cvec_from_file
+ (&context, &cvec4, "pics/luxmarket_tshirt01_sheum.jpg") != 0) {
+ fprintf(stderr, "File 4 not found\n");
+ exit(0);
+ }
+ if (puzzle_fill_cvec_from_file
+ (&context, &cvec5, "pics/duck.gif") != 0) {
+ fprintf(stderr, "File 5 not found\n");
+ exit(0);
+ }
+ if (puzzle_fill_cvec_from_file
+ (&context, &cvec6, "pics/pic-a-0.jpg") != 0) {
+ fprintf(stderr, "File 6 not found\n");
+ exit(0);
+ }
+ d1 = puzzle_vector_normalized_distance(&context, &cvec2, &cvec1, 1);
+ d2 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 1);
+ d3 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec3, 1);
+ d4 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec4, 1);
+ d5 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec5, 1);
+ d6 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec6, 1);
+ printf("%g %g %g %g %g %g\n", d1, d2, d3, d4, d5, d6);
+ puzzle_free_cvec(&context, &cvec1);
+ puzzle_free_cvec(&context, &cvec2);
+ puzzle_free_cvec(&context, &cvec3);
+ puzzle_free_cvec(&context, &cvec4);
+ puzzle_free_cvec(&context, &cvec5);
+ puzzle_free_cvec(&context, &cvec6);
+ puzzle_free_context(&context);
+ if ((int) (d1 * 100.0) != (int) (d2 * 100.0)) {
+ return 1;
+ }
+ if (d1 > PUZZLE_CVEC_SIMILARITY_THRESHOLD ||
+ d3 > PUZZLE_CVEC_SIMILARITY_THRESHOLD ||
+ d4 > PUZZLE_CVEC_SIMILARITY_THRESHOLD ||
+ d5 < PUZZLE_CVEC_SIMILARITY_THRESHOLD ||
+ d6 < PUZZLE_CVEC_SIMILARITY_THRESHOLD) {
+ return 2;
+ }
+ return 0;
+}
diff --git a/deduper/libpuzzle/src/regress_3.c b/deduper/libpuzzle/src/regress_3.c
new file mode 100644
index 0000000..33698ba
--- /dev/null
+++ b/deduper/libpuzzle/src/regress_3.c
@@ -0,0 +1,35 @@
+#include "puzzle_common.h"
+#include "puzzle.h"
+
+#define PUZZLE_VECTOR_SLICE 0.6
+
+int main(void)
+{
+ PuzzleContext context;
+ PuzzleCvec cvec1, cvec2;
+ double d1, d2;
+
+ puzzle_init_context(&context);
+ puzzle_init_cvec(&context, &cvec1);
+ puzzle_init_cvec(&context, &cvec2);
+ if (puzzle_fill_cvec_from_file(&context, &cvec1,
+ "pics/pic-a-0.jpg") != 0) {
+ fprintf(stderr, "File 1 not found\n");
+ exit(0);
+ }
+ if (puzzle_fill_cvec_from_file(&context, &cvec2,
+ "pics/pic-a-1.jpg") != 0) {
+ fprintf(stderr, "File 2 not found\n");
+ exit(0);
+ }
+ d1 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 1);
+ d2 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 0);
+ printf("%g %g\n", d1, d2);
+ puzzle_free_cvec(&context, &cvec1);
+ puzzle_free_cvec(&context, &cvec2);
+ puzzle_free_context(&context);
+ if (d1 > PUZZLE_VECTOR_SLICE || d2 > PUZZLE_VECTOR_SLICE) {
+ return 2;
+ }
+ return 0;
+}
diff --git a/deduper/libpuzzle/src/tunables.c b/deduper/libpuzzle/src/tunables.c
new file mode 100644
index 0000000..280dfb2
--- /dev/null
+++ b/deduper/libpuzzle/src/tunables.c
@@ -0,0 +1,84 @@
+#include "puzzle_common.h"
+#include "puzzle_p.h"
+#include "puzzle.h"
+#include "globals.h"
+
+int puzzle_set_max_width(PuzzleContext * const context,
+ const unsigned int width)
+{
+ if (width <= 0U) {
+ return -1;
+ }
+ context->puzzle_max_width = width;
+
+ return 0;
+}
+
+int puzzle_set_max_height(PuzzleContext * const context,
+ const unsigned int height)
+{
+ if (height <= 0U) {
+ return -1;
+ }
+ context->puzzle_max_height = height;
+
+ return 0;
+}
+
+int puzzle_set_lambdas(PuzzleContext * const context,
+ const unsigned int lambdas)
+{
+ if (lambdas <= 0U) {
+ return -1;
+ }
+ context->puzzle_lambdas = lambdas;
+
+ return 0;
+}
+
+int puzzle_set_p_ratio(PuzzleContext * const context, const double p_ratio)
+{
+ if (p_ratio < 1.0) {
+ return -1;
+ }
+ context->puzzle_p_ratio = p_ratio;
+
+ return 0;
+}
+
+int puzzle_set_noise_cutoff(PuzzleContext * const context,
+ const double noise_cutoff)
+{
+ context->puzzle_noise_cutoff = noise_cutoff;
+
+ return 0;
+}
+
+int puzzle_set_contrast_barrier_for_cropping(PuzzleContext * const context,
+ const double barrier)
+{
+ if (barrier <= 0.0) {
+ return -1;
+ }
+ context->puzzle_contrast_barrier_for_cropping = barrier;
+
+ return 0;
+}
+
+int puzzle_set_max_cropping_ratio(PuzzleContext * const context,
+ const double ratio)
+{
+ if (ratio <= 0.0) {
+ return -1;
+ }
+ context->puzzle_max_cropping_ratio = ratio;
+
+ return 0;
+}
+
+int puzzle_set_autocrop(PuzzleContext * const context, const int enable)
+{
+ context->puzzle_enable_autocrop = (enable != 0);
+
+ return 0;
+}
diff --git a/deduper/libpuzzle/src/vector_ops.c b/deduper/libpuzzle/src/vector_ops.c
new file mode 100644
index 0000000..4fad5bf
--- /dev/null
+++ b/deduper/libpuzzle/src/vector_ops.c
@@ -0,0 +1,95 @@
+#include "puzzle_common.h"
+#include "puzzle_p.h"
+#include "puzzle.h"
+#include "globals.h"
+
+int puzzle_vector_sub(PuzzleContext * const context,
+ PuzzleCvec * const cvecr,
+ const PuzzleCvec * const cvec1,
+ const PuzzleCvec * const cvec2,
+ const int fix_for_texts)
+{
+ size_t remaining;
+ signed char c1, c2, cr;
+
+ (void) context;
+ if (cvec1->sizeof_vec != cvec2->sizeof_vec ||
+ cvec1->sizeof_vec <= (size_t) 0U) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ if (cvecr->vec != NULL) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ cvecr->sizeof_vec = cvec1->sizeof_vec;
+ if ((cvecr->vec = calloc(cvecr->sizeof_vec, sizeof *cvecr->vec)) == NULL) {
+ return -1;
+ }
+ remaining = cvec1->sizeof_vec;
+ if (fix_for_texts != 0) {
+ do {
+ remaining--;
+ c1 = cvec1->vec[remaining];
+ c2 = cvec2->vec[remaining];
+ if ((c1 == 0 && c2 == -2) || (c1 == -2 && c2 == 0)) {
+ cr = -3;
+ } else if ((c1 == 0 && c2 == +2) || (c1 == +2 && c2 == 0)) {
+ cr = +3;
+ } else {
+ cr = c1 - c2;
+ }
+ cvecr->vec[remaining] = cr;
+ } while (remaining > (size_t) 0U);
+ } else {
+ do {
+ remaining--;
+ cvecr->vec[remaining] =
+ cvec1->vec[remaining] - cvec2->vec[remaining];
+ } while (remaining > (size_t) 0U);
+ }
+ return 0;
+}
+
+double puzzle_vector_euclidean_length(PuzzleContext * const context,
+ const PuzzleCvec * const cvec)
+{
+ unsigned long t = 0U;
+ unsigned long c;
+ int c2;
+ size_t remaining;
+
+ (void) context;
+ if ((remaining = cvec->sizeof_vec) <= (size_t) 0U) {
+ return 0.0;
+ }
+ do {
+ remaining--;
+ c2 = (int) cvec->vec[remaining];
+ c = (unsigned long) (c2 * c2);
+ if (ULONG_MAX - t < c) {
+ puzzle_err_bug(__FILE__, __LINE__);
+ }
+ t += c;
+ } while (remaining > (size_t) 0U);
+
+ return sqrt((double) t);
+}
+
+double puzzle_vector_normalized_distance(PuzzleContext * const context,
+ const PuzzleCvec * const cvec1,
+ const PuzzleCvec * const cvec2,
+ const int fix_for_texts)
+{
+ PuzzleCvec cvecr;
+ double dt, dr;
+
+ puzzle_init_cvec(context, &cvecr);
+ puzzle_vector_sub(context, &cvecr, cvec1, cvec2, fix_for_texts);
+ dt = puzzle_vector_euclidean_length(context, &cvecr);
+ puzzle_free_cvec(context, &cvecr);
+ dr = puzzle_vector_euclidean_length(context, cvec1)
+ + puzzle_vector_euclidean_length(context, cvec2);
+ if (dr == 0.0) {
+ return 0.0;
+ }
+ return dt / dr;
+}
diff --git a/deduper/thread_pool.h b/deduper/thread_pool.h
new file mode 100644
index 0000000..ee661ce
--- /dev/null
+++ b/deduper/thread_pool.h
@@ -0,0 +1,127 @@
+#ifndef THREAD_POOL_H
+#define THREAD_POOL_H
+
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+
+template<typename T>
+class _atomic_queue
+{
+public:
+ void push(T&v)
+ {
+ std::unique_lock<std::mutex> lck(mtx);
+ q.push(v);
+ }
+ bool pop(T&v)
+ {
+ std::unique_lock<std::mutex> lck(mtx);
+ if(!q.empty())
+ {
+ v=std::move(q.front());
+ q.pop();
+ return true;
+ }
+ return false;
+ }
+ size_t size()
+ {
+ std::unique_lock<std::mutex> lck(mtx);
+ return q.size();
+ }
+private:
+ std::queue<T> q;
+ std::mutex mtx;
+};
+
+class thread_pool
+{
+public:
+ thread_pool(size_t njobs):waiting_threads(0),stop(false),wait_interrupt(false)
+ {
+ thr.resize(njobs);
+ thstop.resize(njobs);
+ for(size_t i=0;i<njobs;++i)
+ {
+ auto cstop=thstop[i]=std::make_shared<std::atomic<bool>>(false);
+ auto looper=[this,i,cstop]{
+ std::atomic<bool>&stop=*cstop;
+ std::function<void(int)> *f;
+ bool popped=wq.pop(f);
+ while(1)
+ {
+ for(;popped;popped=wq.pop(f))
+ {
+ std::unique_ptr<std::function<void(int)>> pf(f);
+ (*f)(i);
+ if(stop)return;
+ }
+ std::unique_lock<std::mutex> lck(mtx);
+ ++waiting_threads;
+ cv.wait(lck,[this,&f,&popped,&stop]{
+ popped=wq.pop(f);
+ return popped||wait_interrupt||stop;
+ });
+ --waiting_threads;
+ if(!popped)return;
+ }
+ };
+ thr[i].reset(new std::thread(looper));
+ }
+ }
+ template<typename F,typename...A>
+ auto create_task(F&&f,A&&...args)->std::future<decltype(f(0,args...))>
+ {
+ auto task=std::make_shared<std::packaged_task<decltype(f(0,args...))(int)>>(
+ std::bind(std::forward<F>(f),std::placeholders::_1,std::forward<A>(args)...)
+ );
+ auto worktask=new std::function<void(int)>([task](int id){(*task)(id);});
+ wq.push(worktask);
+ std::unique_lock<std::mutex> lck(mtx);
+ cv.notify_one();
+ return task->get_future();
+ }
+ void wait()
+ {
+ if(!stop)wait_interrupt=true;
+ {
+ std::unique_lock<std::mutex> lck(mtx);
+ cv.notify_all();
+ }
+ for(size_t i=0;i<thr.size();++i)if(thr[i]->joinable())thr[i]->join();
+ std::function<void(int)> *f;
+ while(wq.size()){wq.pop(f);delete f;}
+ thr.clear();thstop.clear();
+ }
+ void terminate()
+ {
+ stop=true;
+ std::function<void(int)> *f;
+ while(wq.size()){wq.pop(f);delete f;}
+ for(size_t i=0;i<thstop.size();++i)*thstop[i]=true;
+ {
+ std::unique_lock<std::mutex> lck(mtx);
+ cv.notify_all();
+ }
+ for(size_t i=0;i<thr.size();++i)if(thr[i]->joinable())thr[i]->join();
+ while(wq.size()){wq.pop(f);delete f;}
+ thr.clear();thstop.clear();
+ }
+private:
+ std::vector<std::unique_ptr<std::thread>> thr;
+ std::vector<std::shared_ptr<std::atomic<bool>>> thstop;
+ _atomic_queue<std::function<void(int)>*> wq;
+ std::atomic<bool> wait_interrupt;
+ std::atomic<bool> stop;
+ std::atomic<int> waiting_threads;
+ std::mutex mtx;
+ std::condition_variable cv;
+};
+
+#endif
diff --git a/music/it2midi.cpp b/music/it2midi.cpp
new file mode 100644
index 0000000..ac64bee
--- /dev/null
+++ b/music/it2midi.cpp
@@ -0,0 +1,1105 @@
+/*
+ * Impulse Tracker module file to MIDI converter
+ * Chris Xiong 2017, 2020
+ * License: Expat (MIT)
+ *
+ * Files generated by this application are not meant to be played but to
+ * be placed in a DAW and worked on later.
+ *
+ * General principle:
+ * * One MIDI track for each inst in each channel.
+ *
+ * Process:
+ * parse file -> read patterns -> first pass ->
+ * playback simulation & convert ->
+ * assembly -> post process -> save as MIDI
+ * post process: turn off notes properly, tempo handling etc.
+ *
+ * Default effects mapping:
+ *
+ * Volume column (mostly unimplemented):
+ * value -> MIDI note on velocity
+ * panning -> MIDI CC 10 (pan)
+ * (fine) vol up/down -> MIDI CC 11 (expression)
+ * pitch slide -> MIDI pitch wheel
+ * portamento to -> pitch wheel ~~ or MIDI CC 5 (portamento) (which?)~~
+ * vibrato -> MIDI CC 1 (modulation)
+ *
+ * tempo stuff:
+ * 1 it tick <==> 40 midi ticks, converted file is always 960t/div
+ *
+ * Effect column:
+ * A Set Speed -> handled by per-pattern conversion
+ * B Jump to Ord -> handled by per-pattern conversion and assembler
+ * C Break to Row -> handled by per-pattern conversion
+ * D Sample Volume Slide -> MIDI CC 7 (volume)
+ * E Portamento -> pitch wheel
+ * F Portamento -> E
+ * G Tone Portamento -> pitch wheel or MIDI CC 5 (portamento) (which?)
+ * H Vibrato -> MIDI CC 1 (modulation)
+ * I Tremor -> note on/off
+ * J Arp -> per-pattern conversion
+ * K Vol sld + vib -> D+H
+ * L Vol sld + tp -> D+G
+ * M Channel Vol -> MIDI CC 7 (volume)
+ * N Channel Vol sld -> MIDI CC 7 (volume)
+ * O Sample Offset -> -
+ * P Pan sld -> MIDI CC 10 (pan)
+ * Q Retrigger -> MIDI note on/off
+ * R Tremolo -> -
+ * S Special -> ad hoc
+ * T Tempo -> MIDI tempo
+ * U Fine Vibrato -> H
+ * V Global Vol -> -
+ * W Golbal Vol Slide -> -
+ * X Set panning -> MIDI CC 10 (pan)
+ * Y Panbrello -> -
+ * Z MIDI Macro -> -
+ *
+ * Bugs:
+ * (nothing listed here, but I'm sure there are more than two dozens of 'em)
+ *
+ * Future features?:
+ * instrument max ticks from IT instrument
+ * option for IT vol -> midi vel/midi expression
+ * implement the reset of the vol/effect column effects
+ * custom controller mapping for each instrument
+ *
+ */
+#include <cstdio>
+#include <cstring>
+#include <cstdint>
+#include <cmath>
+#include <stdexcept>
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+const int debuggxx=0;
+const char* IMPHdr="IMPM";
+uint16_t readSW(FILE* f)
+{
+ uint16_t ret=0;
+ for(int i=0;i<2;++i)ret|=(((uint16_t)fgetc(f))&0xFF)<<(i<<3);
+ return ret;
+}
+uint32_t readDW(FILE* f)
+{
+ uint32_t ret=0;
+ for(int i=0;i<4;++i)ret|=(((uint32_t)fgetc(f))&0xFF)<<(i<<3);
+ return ret;
+}
+class ITSample
+{
+ public:
+ int defvol;
+ void readSampleStub(FILE *f)
+ {
+ fseek(f,0x13,SEEK_CUR);
+ defvol=fgetc(f);
+ }
+};
+class ITInstrument
+{
+ public:
+ std::string name;
+ double aenvmaxt=65536;
+ int instrpbrange=24;
+ int sampleref[120];
+ void readInstStub(FILE *f,FILE *mxtkf,int iid)
+ {
+ fseek(f,0x20,SEEK_CUR);
+ char buf[27];buf[26]=0;
+ fread(buf,1,26,f);
+ name=std::string(buf);
+ name+=" (instr #"+std::to_string(iid)+")";
+ printf("instr: %s\n",buf);
+
+ fseek(f,0x20,SEEK_CUR);//sample references
+ for(int i=0;i<120;++i)
+ {
+ fgetc(f);
+ sampleref[i]=fgetc(f);
+ }
+ fseek(f,0x0F0,SEEK_CUR);//amp(vol) envelope
+ int flg=fgetc(f);
+ bool useenv=flg&1;
+ bool decloop=flg&2;
+ if(!useenv||decloop)aenvmaxt=65536;
+ else
+ {
+ int n=fgetc(f);
+ readDW(f);
+ for(int i=0;i<n-1;++i)
+ printf("env node: %d %d\n",fgetc(f),readSW(f));
+ uint8_t lenvy=fgetc(f);
+ uint16_t lenvt=readSW(f);
+ printf("final node: %d %d\n",lenvy,lenvt);
+ aenvmaxt=(n&&lenvy<32)?lenvt:65536;
+ }
+ if(mxtkf)fscanf(mxtkf,"%lf",&aenvmaxt);
+ }
+};
+class ITContainer;
+class ITPattern;
+struct ITCell
+{
+ uint8_t mask,note,inst,vol,efx,fxp;
+ ITCell(){mask=note=inst=vol=efx=fxp=0;}
+};
+class ITRow
+{
+ std::vector<ITCell> cells;
+ private:
+ ITRow(ITContainer *c,ITPattern *pat);
+ public:
+ ITCell& operator [](size_t s){if(s<64)return cells[s];}
+ friend class ITPattern;
+};
+class ITPattern
+{
+ private:
+ std::vector<ITRow> rows;
+ uint16_t len,nrows;
+ std::vector<ITCell> lastval;
+ public:
+ ITPattern(ITContainer *c,int p);
+ ITRow& operator [](size_t s){if(s<rows.size())return rows[s];}
+ size_t rowCount(){return rows.size();}
+ void dumpPattern();
+ friend class ITRow;
+};
+class ITContainer
+{
+ friend class ITRow;
+ friend class ITPattern;
+ friend class ITPlayer;
+ friend class ITConverter;
+ private:
+ FILE *f,*imf;
+ uint16_t cord,cins,csmp,cpat;
+ uint16_t crv,cmv,flag,flagsp;
+ uint8_t initspeed,inittempo;
+ uint16_t msgl;
+ uint32_t msgp;
+ uint8_t chp[64],chv[64];
+ uint8_t *ord;
+ uint32_t *ppat;
+ uint32_t *pins;
+ uint32_t *psmp;
+ std::string title;
+ std::vector<ITInstrument> instr;
+ std::vector<ITSample> sample;
+ std::vector<ITPattern> patterns;
+ public:
+ ITContainer(const char* path)
+ {
+ printf("loading file %s...\n",path);
+ char buf[32];
+ f=fopen(path,"rb");
+ if(!f)
+ throw std::runtime_error("cannot open file");
+ if(fread(buf,1,4,f)!=4)
+ throw std::runtime_error("unexpected EOF");
+ if(strncmp(buf,IMPHdr,4))
+ throw std::runtime_error("wrong impulse header");
+ fread(buf,1,26,f);
+ printf("song name: %s\n",buf);
+ title=std::string(buf);
+ fseek(f,2,SEEK_CUR);//PHiligt
+ cord=readSW(f);cins=readSW(f);csmp=readSW(f);cpat=readSW(f);
+ crv=readSW(f);cmv=readSW(f);flag=readSW(f);flagsp=readSW(f);
+ printf("created with impulse tracker %x.%02x\n",
+ crv>>8,crv&0xff);
+ printf("format version: %x.%02x\n",cmv>>8,cmv&0xff);
+ fseek(f,2,SEEK_CUR);//GV,MV
+ initspeed=fgetc(f);
+ inittempo=fgetc(f);
+ printf("init speed/tempo: %d %d\n",initspeed,inittempo);
+ fseek(f,2,SEEK_CUR);//Sep,PWD
+ msgl=readSW(f);msgp=readDW(f);
+ fseek(f,4,SEEK_CUR);//Reserved
+ fread(chp,1,64,f);
+ fread(chv,1,64,f);
+ ord=new uint8_t[cord];
+ fread(ord,1,cord,f);
+ pins=new uint32_t[cins];
+ fread(pins,4,cins,f);
+ psmp=new uint32_t[csmp];
+ fread(psmp,4,csmp,f);
+ ppat=new uint32_t[cpat];
+ fread(ppat,4,cpat,f);
+ fseek(f,msgp,SEEK_SET);
+ char* msg=new char[msgl+1];
+ fread(msg,1,msgl,f);
+ msg[msgl]=0;
+ for(uint16_t i=0;i<msgl;++i)if(msg[i]==13)msg[i]=10;
+ if(flagsp&1)
+ printf("song message:\n%s\n\n",msg);
+ printf("file containing max length of instruments?(press return for none)\n");
+ char pth[1024];fgets(pth,1024,stdin);pth[strlen(pth)-1]=0;
+ if(strlen(pth))imf=fopen(pth,"r");else imf=NULL;
+ instr.push_back(ITInstrument());
+ for(uint16_t i=0;i<cins;++i)
+ {
+ fseek(f,pins[i],SEEK_SET);
+ instr.push_back(ITInstrument());
+ instr.back().readInstStub(f,imf,i+1);
+ printf("instr %d: %s\n",i+1,instr.back().name.c_str());
+ }
+ for(uint16_t i=0;i<csmp;++i)
+ {
+ fseek(f,psmp[i],SEEK_SET);
+ sample.push_back(ITSample());
+ sample.back().readSampleStub(f);
+ }
+ for(uint16_t i=0;i<cpat;++i)
+ {
+ patterns.push_back(ITPattern(this,i));
+ //patterns.back().dumpPattern();
+ }
+ }
+ ~ITContainer()
+ {
+ delete[] ord;
+ delete[] ppat;
+ delete[] pins;
+ delete[] psmp;
+ fclose(f);
+ if(imf)fclose(imf);
+ }
+};
+ITRow::ITRow(ITContainer *c,ITPattern *pat)
+{
+ cells.resize(64);
+ for(uint8_t chmarker;chmarker=fgetc(c->f);)
+ {
+ uint8_t ch=(chmarker-1)&0x3f;
+ if(chmarker>>7)
+ pat->lastval[ch].mask=cells[ch].mask=fgetc(c->f);
+ else
+ cells[ch].mask=pat->lastval[ch].mask;
+ if(cells[ch].mask&0x01)
+ pat->lastval[ch].note=cells[ch].note=fgetc(c->f);
+ if(cells[ch].mask&0x02)
+ pat->lastval[ch].inst=cells[ch].inst=fgetc(c->f);
+ if(cells[ch].mask&0x04)
+ pat->lastval[ch].vol=cells[ch].vol=fgetc(c->f);
+ if(cells[ch].mask&0x08)
+ {
+ pat->lastval[ch].efx=cells[ch].efx=fgetc(c->f);
+ pat->lastval[ch].fxp=cells[ch].fxp=fgetc(c->f);
+ }
+ if(cells[ch].mask&0x10)
+ cells[ch].note=pat->lastval[ch].note;
+ if(cells[ch].mask&0x20)
+ cells[ch].inst=pat->lastval[ch].inst;
+ if(cells[ch].mask&0x40)
+ cells[ch].vol=pat->lastval[ch].vol;
+ if(cells[ch].mask&0x80)
+ {
+ cells[ch].efx=pat->lastval[ch].efx;
+ cells[ch].fxp=pat->lastval[ch].fxp;
+ }
+ }
+}
+ITPattern::ITPattern(ITContainer *c,int p)
+{
+ printf("loading pattern %d\n",p);
+ fseek(c->f,c->ppat[p],SEEK_SET);
+ len=readSW(c->f);
+ nrows=readSW(c->f);
+ readDW(c->f);
+ long pos=ftell(c->f);
+ lastval.resize(64);
+ for(size_t i=0;i<nrows;++i)
+ rows.push_back(ITRow(c,this));
+ if(ftell(c->f)-pos!=len)
+ {
+ printf("length mismatch: %u<->%u\n",ftell(c->f)-pos,len);
+ throw std::runtime_error("length mismatch");
+ }
+}
+void ITPattern::dumpPattern()
+{
+ for(size_t i=0;i<nrows;++i)
+ {
+ printf("|");
+ const char* notes="C-C#D-D#E-F-F#G-G#A-A#B-";
+ for(int ch=0;ch<64;++ch)
+ {
+ if(rows[i].cells[ch].note)
+ {
+ if(rows[i].cells[ch].note==0xff)
+ printf("== ");
+ else if(rows[i].cells[ch].note==0xfe)
+ printf("^^ ");
+ else if(rows[i].cells[ch].note>=120)
+ printf("~~ ");
+ else
+ printf("%2.2s%1d ",notes+(rows[i].cells[ch].note%12)*2,rows[i].cells[ch].note/12);
+ }
+ else printf("... ");
+ if(rows[i].cells[ch].inst)
+ printf("%02d ",rows[i].cells[ch].inst);
+ else printf(".. ");
+ if(rows[i].cells[ch].vol)
+ printf("%03d ",rows[i].cells[ch].vol);
+ else printf("... ");
+ if(rows[i].cells[ch].efx)
+ printf("%c%02x|",'A'-1+rows[i].cells[ch].efx,rows[i].cells[ch].fxp);
+ else printf("...|");
+ }
+ puts("");
+ }
+}
+class ITCellAction
+{
+ public:
+ virtual void action(
+ uint8_t ch,
+ uint8_t mask,
+ uint8_t note,
+ uint8_t inst,
+ uint8_t vol,
+ uint8_t efx,
+ uint8_t fxp
+ )=0;
+ virtual ~ITCellAction(){}
+};
+class ITPlayer
+{
+ private:
+ ITContainer& c;
+ ITCellAction* a;
+ uint8_t pmsk[64],pnt[64],pinst[64],pvol[64],pefx[64],pfxp[64];
+ int skpord,skprow;
+ void processPattern(size_t pat,uint16_t _skprow=0)
+ {
+ printf("process pattern %u\n",pat);
+ skprow=-1;
+ for(uint16_t i=_skprow;i<c.patterns[pat].rowCount();++i)
+ {
+ for(uint8_t ch=0;ch<64;++ch)
+ {
+ uint8_t msk=0,note=0,inst=0,vol=0,efx=0,fxp=0;
+ ITCell& cell=c.patterns[pat][i][ch];
+ msk=cell.mask;
+ note=cell.note;
+ inst=cell.inst;
+ vol=cell.vol;
+ efx=cell.efx;
+ fxp=cell.fxp;
+ if(a)a->action(ch,msk>>4|msk,note,inst,vol,efx,fxp);
+ }
+ a->action(0xFF,0,0,0,0,0,0);
+ if(~skpord||~skprow)return;
+ }
+ }
+ public:
+ ITPlayer(ITContainer &_c,ITCellAction *_a=NULL):c(_c),a(_a)
+ {
+ skpord=skprow=-1;
+ for(uint16_t i=0;i<64;++i)pmsk[i]=pnt[i]=pinst[i]=pvol[i]=pefx[i]=pfxp[i]=0;
+ for(uint16_t i=0;i<c.cord;++i)
+ {
+ if(c.ord[i]==255)break;
+ if(c.ord[i]==254)continue;
+ processPattern(c.ord[i],~skprow?skprow:0);
+ if(~skpord)
+ {
+ if(skpord<=i)puts("loop?");
+ else i=skpord-1;
+ skpord=-1;
+ }
+ }
+ }
+ void skipPattern(int ord=-1,int row=-1)
+ {
+ if(~ord)skpord=ord;
+ if(~row)skprow=row;
+ }
+};
+struct MidiEvent
+{
+ uint32_t time,type,p1,p2;
+ std::string str;
+ MidiEvent(uint32_t _t,uint32_t _tp,uint32_t _p1,uint32_t _p2,const char* s=NULL)
+ {
+ time=_t;type=_tp;p1=_p1;p2=_p2;
+ if(s)str=std::string(s);else str="";
+ }
+ static MidiEvent noteOff(uint32_t t,uint32_t ch,uint32_t note,uint32_t vel=0x40)
+ {
+ if(vel==0x40)return MidiEvent(t,0x90|ch,note,0);
+ return MidiEvent(t,0x80|ch,note,vel);
+ }
+ static MidiEvent noteOn(uint32_t t,uint32_t ch,uint32_t note,uint32_t vel)
+ {
+ return MidiEvent(t,0x90|ch,note,vel);
+ }
+ static MidiEvent cc(uint32_t t,uint32_t ch,uint32_t cc,uint32_t val)
+ {
+ return MidiEvent(t,0xB0|ch,cc,val);
+ }
+ static MidiEvent pc(uint32_t t,uint32_t ch,uint32_t p)
+ {
+ return MidiEvent(t,0xC0|ch,p,0);
+ }
+ static MidiEvent pb(uint32_t t,uint32_t ch,uint32_t val)
+ {
+ if(val>16383)val=16383;
+ return MidiEvent(t,0xE0|ch,val&0x7F,val>>7);
+ }
+ static MidiEvent tempo(uint32_t t,double tmpo)
+ {
+ int us=60000000./tmpo;
+ char c[4];
+ c[0]=us>>16&0xFF;c[1]=us>>8&0xFF;c[2]=us&0xFF;c[3]=0;
+ return MidiEvent(t,0xFF,0x51,0x03,c);
+ }
+ static MidiEvent tsig(uint32_t t,uint32_t n,uint32_t pot_d)
+ {
+ char c[5];
+ c[0]=n;c[1]=pot_d;c[2]=24;c[3]=8;c[4]=0;
+ return MidiEvent(t,0xFF,0x58,0x04,c);
+ }
+ static MidiEvent trkname(uint32_t t,const char* s)
+ {
+ return MidiEvent(t,0xFF,0x03,strlen(s),s);
+ }
+};
+struct MidiTrack
+{
+ std::vector<MidiEvent> eventList;
+};
+class MidiFile
+{
+ private:
+ FILE* f;
+ void writeDWBE(uint32_t v,FILE* f)
+ {
+ for(int i=3;i>=0;--i)
+ fputc(v>>(i<<3)&0xFF,f);
+ }
+ void writeSWBE(uint16_t v,FILE* f)
+ {
+ for(int i=1;i>=0;--i)
+ fputc(v>>(i<<3)&0xFF,f);
+ }
+ void dumpVL(uint32_t v,std::vector<uint8_t> &d)
+ {
+ if(v>0x0FFFFFFF)throw std::runtime_error("VL overflow");
+ uint32_t sh=4*7;
+ while(sh&&!(v>>sh))sh-=7;
+ for(;sh>0;sh-=7)d.push_back(((v>>sh)&0x7F)|0x80);
+ d.push_back(v&0x7f);
+ }
+ void writeTrack(const MidiTrack &tr)
+ {
+ std::vector<uint8_t> buf;
+ fputs("MTrk",f);
+ uint32_t curt=0,lastst=0;
+ for(const MidiEvent &j:tr.eventList)
+ {
+ size_t ip=buf.size();
+ if(j.type<0xF0)
+ {
+ dumpVL(j.time-curt,buf);
+ curt=j.time;
+ if(lastst!=j.type)
+ buf.push_back(lastst=j.type);
+ buf.push_back(j.p1);
+ if((j.type&0xF0)!=0xC0&&(j.type&0xF0)!=0xD0)
+ buf.push_back(j.p2);
+ }
+ else
+ {
+ dumpVL(j.time-curt,buf);
+ curt=j.time;
+ buf.push_back(lastst=j.type);
+ buf.push_back(j.p1);
+ dumpVL(j.p2,buf);
+ for(uint32_t i=0;i<j.p2;++i)buf.push_back(j.str[i]);
+ }
+ }
+ dumpVL(0,buf);buf.push_back(0xFF);buf.push_back(0x2F);buf.push_back(0);
+ writeDWBE(buf.size(),f);
+ fwrite(buf.data(),1,buf.size(),f);
+ }
+ public:
+ uint16_t divs;
+ std::vector<MidiTrack> tracks;
+ void dumpFile(std::vector<size_t> tracksw={})
+ {
+ puts("Midi File dump");
+ if(tracksw.empty())
+ for(size_t i=0;i<tracks.size();++i)tracksw.push_back(i);
+ for(size_t& ii:tracksw)
+ {
+ MidiTrack &i=tracks[ii];
+ puts("==============track==============");
+ for(MidiEvent &j:i.eventList)
+ if(j.str.length())
+ printf("type %x @%x p1 %x p2 %x str %s\n",j.type,
+ j.time,j.p1,j.p2,j.str.c_str());
+ else
+ printf("type %x @%x p1 %x p2 %x\n",j.type,
+ j.time,j.p1,j.p2);
+ }
+ }
+ void writeFile(const char* path,std::vector<size_t> tracksw={})
+ {
+ for(MidiTrack &i:tracks)
+ std::stable_sort(i.eventList.begin(),i.eventList.end(),
+ [](const MidiEvent& a,const MidiEvent& b)->bool{
+ return a.time<b.time;
+ }
+ );
+ f=fopen(path,"wb");
+ fputs("MThd",f);
+ writeDWBE(6,f);
+ writeSWBE(1,f);
+ if(tracksw.size())
+ writeSWBE(tracksw.size(),f);
+ else
+ writeSWBE(tracks.size(),f);
+ writeSWBE(divs,f);
+ if(tracksw.size())
+ for(auto& i:tracksw)writeTrack(tracks[i]);
+ else
+ for(MidiTrack &i:tracks)writeTrack(i);
+ fclose(f);
+ }
+};
+class ITConverter
+{
+ friend class ITCellActionPre;
+ friend class ITCellActionConv;
+ private:
+ ITContainer& c;
+ std::map<std::pair<uint8_t,uint8_t>,int> minstch;
+ MidiFile f;
+ ITPlayer *p;
+ uint8_t speed,tempo;
+ uint8_t chnote[64],chinst[64],chvol[64],chefx[64],chfxp[64];
+ uint8_t chvelm[64],chvolm[64],chpanm[64];
+ uint8_t chefxmem[64][32]={0};
+ uint8_t chefxflg[64][32]={0};
+ int chportasrcnote[64]={0},chportadstnote[64]={0};
+ double chpitchm[64]={0};
+ double chage[64];
+ uint32_t currow,curmiditk;
+ class ITCellActionPre:public ITCellAction
+ {
+ private:
+ ITConverter* par;
+ public:
+ ITCellActionPre(ITConverter *_p):par(_p){}
+ void action(
+ uint8_t ch,
+ uint8_t mask,
+ uint8_t note,
+ uint8_t inst,
+ uint8_t vol,
+ uint8_t efx,
+ uint8_t fxp
+ )
+ {
+ if(mask>>1&1)
+ {
+ if(inst)par->chinst[ch]=inst;
+ par->minstch[std::make_pair(ch,inst)]=0;
+ }
+ if(mask>>0&1)//note
+ {
+ if(!inst)inst=par->chinst[ch];
+ if(note<120)par->minstch[std::make_pair(ch,inst)]=0;
+ }
+ }
+ };
+ class ITCellActionConv:public ITCellAction
+ {
+ private:
+ ITConverter* par;
+ public:
+ ITCellActionConv(ITConverter *_p):par(_p){}
+ void action(
+ uint8_t ch,
+ uint8_t mask,
+ uint8_t note,
+ uint8_t inst,
+ uint8_t vol,
+ uint8_t efx,
+ uint8_t fxp
+ )
+ {
+ if(ch==0xFF)
+ {
+ ++par->currow;
+ par->curmiditk+=40*par->speed;
+ for(uint8_t i=0;i<64;++i)
+ if(par->chnote[i]!=255)
+ {
+ par->chage[i]+=2.5/par->tempo*par->speed;
+ if(par->c.instr[par->chinst[i]].aenvmaxt<par->chage[i])
+ {
+ par->f.tracks[par->minstch[std::make_pair(i,par->chinst[i])]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk,0,par->chnote[i]));
+ par->chnote[i]=255;
+ }
+ }
+ return;
+ }
+ uint8_t previnst=par->chinst[ch];
+ if(mask>>1&1)//inst
+ {
+ if(inst)par->chinst[ch]=inst;
+ }
+ if(mask>>2&1)//vol
+ {
+ if(vol<=64)par->chvelm[ch]=(vol==64?127:2*vol);
+ if(par->chvelm[ch]==0)par->chvelm[ch]=1;
+ else if(vol>=128&&vol<=192)
+ {
+ par->chpanm[ch]=(vol==192?127:(vol-128)*2);
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::cc(par->curmiditk,0,10,par->chpanm[ch]));
+ }
+ }
+ else
+ {
+ //default volume
+ uint32_t curnote=par->chnote[ch];
+ if(mask>>0&1)curnote=note;
+ if(curnote<120 && curnote>=12)
+ par->chvelm[ch]=par->c.sample[par->c.instr[par->chinst[ch]].sampleref[curnote-12]-1].defvol*2;
+ if(par->chvelm[ch]>127)
+ par->chvelm[ch]=127;
+ if(par->chvelm[ch]==0)
+ {
+ //printf("!!!!inst %d samp %d note %d def vol %d\n",par->chinst[ch],par->c.instr[par->chinst[ch]].sampleref[curnote]-1,curnote,par->c.sample[par->c.instr[par->chinst[ch]].sampleref[curnote]-1].defvol*2);
+ par->chvelm[ch]=1;
+ }
+ }
+ if((mask>>0&1)&&note<120&&!((mask>>3&1)&&efx==7))//reset for pitch slides (E/F, but not G)
+ {
+ if(fabs(par->chpitchm[ch])>1e-6)
+ {
+ par->chpitchm[ch]=0;
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::pb(par->curmiditk,0,8192));
+ }
+ }
+ int notedelay=0;
+ if(mask>>3&1)//efx
+ {
+ switch(efx)
+ {
+ case 1://speed
+ if(fxp)par->speed=fxp;
+ break;
+ case 2://jump
+ par->p->skipPattern(fxp);
+ break;
+ case 3://break
+ par->p->skipPattern(-1,fxp);
+ break;
+ case 4://vol slide
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ fxp=par->chefxmem[ch][efx];
+ //unimplemented
+ break;
+ case 5://pitch down
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ fxp=par->chefxmem[ch][efx];
+ {
+ double granularity=fxp/16.;
+ if(fxp>=0xe0)
+ granularity=(fxp&0x0f)/(fxp>=0xf0?16.:64.);
+ int runfor=fxp>=0xe0?1:par->speed;
+ for(int tk=0;tk<runfor;++tk)
+ {
+ par->chpitchm[ch]-=granularity;
+ double pbv=par->chpitchm[ch]/par->c.instr[par->chinst[ch]].instrpbrange;
+ if(pbv>1)pbv=1;
+ if(pbv<-1)pbv=-1;
+ uint32_t pbvi=int(pbv*8192)+8192;
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::pb(40*tk+par->curmiditk,0,pbvi));
+ }
+ }
+ break;
+ case 6://pitch up
+ {
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ fxp=par->chefxmem[ch][efx];
+ {
+ double granularity=fxp/16.;
+ if(fxp>=0xe0)
+ granularity=(fxp&0x0f)/(fxp>=0xf0?16.:64.);
+ int runfor=fxp>=0xe0?1:par->speed;
+ for(int tk=0;tk<runfor;++tk)
+ {
+ par->chpitchm[ch]+=granularity;
+ double pbv=par->chpitchm[ch]/par->c.instr[par->chinst[ch]].instrpbrange;
+ if(pbv>1)pbv=1;
+ if(pbv<-1)pbv=-1;
+ uint32_t pbvi=int(pbv*8192)+8192;
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::pb(40*tk+par->curmiditk,0,pbvi));
+ }
+ }
+ }
+ break;
+ case 7://porta
+ {
+ if(fxp)
+ {
+ par->chefxmem[ch][efx]=fxp;
+ //if(!(mask>>0&1)||note>=120)
+ // puts("no note to slide to?");
+ if(par->chnote[ch]>=120 || previnst != par->chinst[ch])//Gxx without a prior note doesn't seem to have any effect? Also doesn't carry through inst changes?
+ {
+ efx=par->chefxmem[ch][efx]=0;
+ break;
+ }
+ par->chportasrcnote[ch]=par->chnote[ch];
+ if(mask&1)
+ par->chportadstnote[ch]=note;
+ }
+ else
+ {
+ fxp=par->chefxmem[ch][efx];
+ if(mask&1)
+ par->chportadstnote[ch]=note;
+ }
+ if(debuggxx)printf("src %d dst %d\n", par->chportasrcnote[ch], par->chportadstnote[ch]);
+ for(int tk=0;tk<par->speed;++tk)
+ {
+ if(fabs(par->chpitchm[ch]+par->chportasrcnote[ch]-par->chportadstnote[ch])<1e-6)
+ break;
+ if(debuggxx)printf("pitch %f\n",par->chpitchm[ch]);
+ int portadir=((par->chpitchm[ch]+par->chportasrcnote[ch])>par->chportadstnote[ch])?-1:1;
+ double nextpitchm=par->chpitchm[ch]+1.*portadir*fxp/16.;
+ int nextportadir=((nextpitchm+par->chportasrcnote[ch])>par->chportadstnote[ch])?-1:1;
+ if(nextportadir*portadir<0)//detect overshoot
+ par->chpitchm[ch]=par->chportadstnote[ch]-par->chportasrcnote[ch];
+ else
+ par->chpitchm[ch]+=1.*portadir*fxp/16.;
+ double pbv=par->chpitchm[ch]/par->c.instr[par->chinst[ch]].instrpbrange;
+ if(pbv>1)pbv=1;
+ if(pbv<-1)pbv=-1;
+ uint32_t pbvi=int(pbv*8192)+8192;
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::pb(40*tk+par->curmiditk,0,pbvi));
+ }
+ }
+ break;
+ case 8://vib
+ par->chefxflg[ch][efx]=1;
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ {
+ //fxp=par->chefxmem[ch][efx];
+ //actually midi has us covered already,
+ //no need to save the memory manually.
+ break;
+ }
+ //vibrato rate is currently ignored, even it is supported on some synths as CC76
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::cc(par->curmiditk,0,1,(fxp&0x0f)*8));
+ break;
+ case 9://tremor
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ fxp=par->chefxmem[ch][efx];
+ break;
+ //unimplemented
+ case 10://arp
+ {
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ fxp=par->chefxmem[ch][efx];
+ uint32_t curnote=par->chnote[ch];
+ if(mask>>0&1)curnote=note;
+ if(curnote<120)
+ {
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk+40*par->speed/3,0,curnote));
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOn(par->curmiditk+40*par->speed/3,0,curnote+(fxp>>4),par->chvelm[ch]));
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk+80*par->speed/3,0,curnote+(fxp>>4)));
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOn(par->curmiditk+80*par->speed/3,0,curnote+(fxp&0x0F),par->chvelm[ch]));
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk+120*par->speed/3,0,curnote+(fxp&0x0F)));
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOn(par->curmiditk+120*par->speed/3,0,curnote,par->chvelm[ch]));
+ }
+ }
+ break;
+ case 17://Retrigger
+ {
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ fxp=par->chefxmem[ch][efx];
+ std::vector<std::function<int(int)>> retrigmod = {
+ [](int a){return a;},
+ [](int a){return a-1>0?a-1:0;},
+ [](int a){return a-2>0?a-2:0;},
+ [](int a){return a-4>0?a-4:0;},
+ [](int a){return a-8>0?a-8:0;},
+ [](int a){return a-16>0?a-16:0;},
+ [](int a){return a*2/3;},
+ [](int a){return a/2;},
+ [](int a){return a;},
+ [](int a){return a+1>63?63:a+1;},
+ [](int a){return a+2>63?63:a+2;},
+ [](int a){return a+4>63?63:a+4;},
+ [](int a){return a+8>63?63:a+8;},
+ [](int a){return a+16>63?63:a+16;},
+ [](int a){return a*3/2>63?63:a*3/2;},
+ [](int a){return a*2>63?63:a*2;},
+ };
+ int tspan = fxp & 0xF;
+ int vmod = fxp >> 4;
+ int ctk = 0;
+ int vel = par->chvelm[ch];
+ uint32_t curnote=par->chnote[ch];
+ if (tspan==0) break; //can't handle that!
+ if (curnote==255) curnote=note;
+ auto &eventList = par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].eventList;
+
+ while (ctk + tspan < par->speed)
+ {
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk+40*ctk,0,curnote));
+ vel = retrigmod[vmod](vel);
+ eventList.push_back(MidiEvent::noteOn(par->curmiditk+40*ctk,0,curnote, vel));
+ ctk += tspan;
+ }
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk+40*ctk,0,curnote));
+ }
+ break;
+ case 19://Sxx controls
+ switch(fxp&0xf0)
+ {
+ case 0xc0://note cut
+ {
+ uint8_t param=fxp&0x0f;
+ if(!param)param=1;
+ uint32_t curnote=par->chnote[ch];
+ if(mask>>0&1)curnote=note;
+ if(curnote<120)
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk+40*param,0,curnote));
+ }
+ break;
+ case 0xd0://note delay
+ {
+ uint8_t param=fxp&0x0f;
+ if(!param)param=1;
+ notedelay=40*param;
+ }
+ break;
+ }
+ break;
+ case 20://tempo
+ {
+ static uint8_t tempop=0;
+ if(fxp>0x20)
+ {
+ par->tempo=fxp;
+ par->f.tracks[0].eventList.push_back(MidiEvent::tempo(par->curmiditk,fxp));
+ }
+ else
+ {
+ if(fxp==0x00)fxp=tempop;
+ if(fxp>0x10)
+ {
+ tempop=fxp;
+ for(int i=1;i<par->speed;++i)
+ par->f.tracks[0].eventList.push_back(MidiEvent::tempo(par->curmiditk+i*40,par->tempo+=fxp&0x0F));
+ }
+ else if(fxp>0x00)
+ {
+ tempop=fxp;
+ for(int i=1;i<par->speed;++i)
+ par->f.tracks[0].eventList.push_back(MidiEvent::tempo(par->curmiditk+i*40,par->tempo-=fxp&0x0F));
+ }
+ }
+ }
+ break;
+ case 21://fine vib
+ par->chefxflg[ch][efx]=1;
+ //memory implementation is non-compliant
+ if(fxp)
+ par->chefxmem[ch][efx]=fxp;
+ else
+ {
+ //fxp=par->chefxmem[ch][efx];
+ //actually midi has us covered already,
+ //no need to save the memory manually.
+ break;
+ }
+ //vibrato rate is currently ignored, even it is supported on some synths as CC76
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::cc(par->curmiditk,0,1,(fxp&0x0f)*2));
+ break;
+ case 24://set pan
+ {
+ uint64_t param=fxp/2;
+ if(param>127)param=127;
+ if(!par->chinst[ch]||par->minstch.find(std::make_pair(ch,par->chinst[ch]))==par->minstch.end())
+ {
+ //set panning for all midi channels associated with this IT channel
+ }
+ else
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::cc(par->curmiditk,0,10,param));
+ }
+ break;
+ }
+ }
+ //stop non-active effects
+ for(int i=0;i<20;++i)
+ {
+ if((!(mask>>3&1)||((mask>>3&1)&&efx!=i))&&par->chefxflg[ch][i])
+ {
+ par->chefxflg[ch][i]=0;
+ switch(i)
+ {
+ case 8:
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::cc(par->curmiditk,0,1,0));
+ break;
+ }
+ }
+ }
+ if(mask>>0&1)//note
+ {
+ if(!inst)inst=par->chinst[ch];
+ if(note<120&&par->minstch.find(std::make_pair(ch,inst))==par->minstch.end())
+ {
+ printf("instr: %d @ ch %d note %d row %d\n",inst,ch,note,par->currow);
+ throw std::runtime_error("wtf instrument???");
+ }
+ if(par->chnote[ch]!=255&&note<120&&efx!=7)
+ par->f.tracks[par->minstch[std::make_pair(ch,previnst)]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk,0,par->chnote[ch]));
+ if(note>=120)
+ {
+ if(par->chnote[ch]<128)
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOff(par->curmiditk,0,par->chnote[ch]));
+ par->chnote[ch]=255;
+ }
+ else
+ {
+ if(efx!=7)
+ {
+ par->chnote[ch]=note;par->chage[ch]=0;
+ par->f.tracks[par->minstch[std::make_pair(ch,par->chinst[ch])]].
+ eventList.push_back(MidiEvent::noteOn(par->curmiditk+notedelay,0,note,par->chvelm[ch]));
+ }
+ else
+ {
+ par->chportadstnote[ch]=note;
+ if(debuggxx)printf("dst->%d\n",par->chportadstnote[ch]);
+ }
+ }
+ }
+ }
+ };
+ public:
+ ITConverter(ITContainer &_c,const char* path,bool single_instr_mode=false):c(_c)
+ {
+ ITCellActionPre* cap=new ITCellActionPre(this);
+ for(int i=0;i<64;++i)chinst[i]=1;
+ p=new ITPlayer(c,cap);
+ delete p;delete cap;
+ int trcnt=1;
+ //track 0 is reserved for song title, speed changes etc
+ for(auto &i:minstch)
+ {
+ i.second=trcnt++;
+ printf("ch %d, inst %d -> %d\n",i.first.first,i.first.second,i.second);
+ }
+ for(int i=0;i<64;++i)chvelm[i]=chvolm[i]=100,chpanm[i]=64,chnote[i]=255,chinst[i]=1;
+ speed=c.initspeed,tempo=c.inittempo;
+ currow=curmiditk=0;
+ f.divs=960;
+ f.tracks.push_back(MidiTrack());
+ for(auto i=minstch.begin();i!=minstch.end();++i)
+ {
+ f.tracks.push_back(MidiTrack());
+ char buf[48];
+ snprintf(buf,48,"%s @ ch%d",c.instr[i->first.second].name.c_str(),i->first.first);
+ f.tracks.back().eventList.push_back(MidiEvent::trkname(0,buf));
+ f.tracks.back().eventList.push_back(MidiEvent::cc(0,0,0x65,0));
+ f.tracks.back().eventList.push_back(MidiEvent::cc(0,0,0x64,0));
+ f.tracks.back().eventList.push_back(MidiEvent::cc(0,0,0x06,c.instr[i->first.second].instrpbrange));
+ }
+ f.tracks[0].eventList.push_back(MidiEvent::trkname(0,c.title.c_str()));
+ f.tracks[0].eventList.push_back(MidiEvent::tsig(0,1,2));
+ f.tracks[0].eventList.push_back(MidiEvent::tempo(0,tempo));
+ ITCellActionConv* cac=new ITCellActionConv(this);
+ p=new ITPlayer(c,cac);
+ for(int i=0;i<64;++i)//turn off any remaining notes
+ if(chnote[i]!=255)
+ f.tracks[minstch[std::make_pair(i,chinst[i])]].
+ eventList.push_back(MidiEvent::noteOff(curmiditk,0,chnote[i]));
+ //f.dumpFile();
+ for(auto i=f.tracks.begin()+1;i!=f.tracks.end();)
+ {
+ bool df=false;
+ if(!i->eventList.size())df=true;
+ else
+ {
+ df=true;
+ for(auto &j:i->eventList)
+ if((j.type&0xF0)==0x90)df=false;
+ }
+ if(df&&!single_instr_mode){auto j=i+1;f.tracks.erase(i);i=j;}
+ else ++i;
+ }
+ printf("final file has %lu tracks.\n",f.tracks.size());
+ if(single_instr_mode)
+ {
+ for(int instr=1;instr<=c.cins;++instr)
+ {
+ std::vector<size_t> tr={0};
+ for(auto &pr:minstch)
+ if(pr.first.second==instr && pr.second != 0)
+ tr.push_back(pr.second);
+ if(tr.size()==1)
+ {
+ printf("instr #%d seems unused, skipped\n",instr);
+ continue;
+ }
+ f.writeFile((std::string(path)+"."+std::to_string(instr)+".mid").c_str(),tr);
+ }
+ }
+ else f.writeFile(path);
+ delete p;delete cac;
+ }
+};
+int main()
+{
+ ITContainer it("modulefile.it");
+ ITConverter itc(it,"output.mid");
+ return 0;
+}