aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt14
-rw-r--r--tests/CMakeLists.txt18
-rw-r--r--tests/deduper_legacy.cpp194
-rw-r--r--xsig/CMakeLists.txt22
-rw-r--r--xsig/include/signature.hpp (renamed from signature.hpp)0
-rw-r--r--xsig/include/signature_db.hpp (renamed from signature_db.hpp)0
-rw-r--r--xsig/include/subslice_signature.hpp (renamed from subslice_signature.hpp)0
-rw-r--r--xsig/src/base64.cpp (renamed from base64.cpp)0
-rw-r--r--xsig/src/base64.hpp (renamed from base64.hpp)0
-rw-r--r--xsig/src/compressed_vector.hpp (renamed from compressed_vector.hpp)0
-rw-r--r--xsig/src/imageutil.cpp (renamed from imageutil.cpp)0
-rw-r--r--xsig/src/imageutil.hpp (renamed from imageutil.hpp)0
-rw-r--r--xsig/src/signature.cpp (renamed from signature.cpp)0
-rw-r--r--xsig/src/signature_db.cpp (renamed from signature_db.cpp)0
-rw-r--r--xsig/src/subslice_signature.cpp (renamed from subslice_signature.cpp)0
-rw-r--r--xsig/src/thread_pool.hpp (renamed from thread_pool.hpp)0
16 files changed, 28 insertions, 220 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6536a47..f535cf3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,18 +15,10 @@ SET(CMAKE_EXTRA_INCLUDE_FILES "filesystem")
check_type_size("std::filesystem::path::value_type" PATH_VALSIZE LANGUAGE CXX)
SET(CMAKE_EXTRA_INCLUDE_FILES)
-add_compile_definitions(PATH_VALSIZE=${PATH_VALSIZE})
-
-include_directories(.)
+option(BUILD_SHARED_LIBS ON)
-add_library(xsig STATIC
- imageutil.cpp
- signature.cpp
- subslice_signature.cpp
- signature_db.cpp
- base64.cpp
-)
+add_compile_definitions(PATH_VALSIZE=${PATH_VALSIZE})
-target_compile_options(xsig PRIVATE -Werror=return-type)
+add_subdirectory(xsig)
add_subdirectory(tests)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 78ad4fe..fa76ab9 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,3 +1,6 @@
+get_target_property(xsig_priv_incdir xsig INCLUDE_DIRECTORIES)
+include_directories(compressed_vector ${xsig_priv_incdir})
+
add_executable(compressed_vector compressed_vector.cpp)
target_link_libraries(compressed_vector
xsig
@@ -20,18 +23,8 @@ target_link_libraries(image_util_tests
add_executable(signature_test signature_test.cpp)
target_link_libraries(signature_test
xsig
- opencv_core
- opencv_imgcodecs
- opencv_imgproc
)
-#add_executable(deduper_legacy deduper_legacy.cpp)
-#target_link_libraries(deduper_legacy
-# ${OpenCV_LIBS}
-# ${CMAKE_THREAD_LIBS_INIT}
-# xsig
-#)
-
add_executable(testdrive testdrive.cpp)
target_link_libraries(testdrive
xsig
@@ -47,11 +40,6 @@ endif()
add_executable(testdrive_sqlite testdrive_sqlite.cpp)
target_link_libraries(testdrive_sqlite
xsig
- opencv_core
- opencv_imgcodecs
- opencv_imgproc
- ${SQLite3_LIBRARIES}
- ${CMAKE_THREAD_LIBS_INIT}
)
if(WIN32)
target_link_libraries(testdrive_sqlite shell32 kernel32)
diff --git a/tests/deduper_legacy.cpp b/tests/deduper_legacy.cpp
deleted file mode 100644
index bcd8514..0000000
--- a/tests/deduper_legacy.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-#include "signature.hpp"
-
-#include <cstdio>
-#include <cstring>
-
-#include <filesystem>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include <getopt.h>
-
-#include "thread_pool.hpp"
-
-int ctr;
-int recursive;
-int njobs=1;
-double threshold=0.3;
-std::vector<std::string> paths;
-
-int parse_arguments(int argc,char **argv)
-{
- recursive=0;
- int help=0;
- option longopt[]=
- {
- {"recursive",no_argument ,&recursive,1},
-// {"destdir" ,required_argument,0 ,'D'},
- {"jobs" ,required_argument,0 ,'j'},
- {"threshold",required_argument,0 ,'d'},
- {"help" ,no_argument ,&help ,1},
- {0 ,0 ,0 ,0}
- };
- while(1)
- {
- int idx=0;
- int c=getopt_long(argc,argv,"rhj:d:",longopt,&idx);
- if(!~c)break;
- switch(c)
- {
- case 0:
- if(longopt[idx].flag)break;
- if(std::string("jobs")==longopt[idx].name)
- sscanf(optarg,"%d",&njobs);
- if(std::string("threshold")==longopt[idx].name)
- sscanf(optarg,"%lf",&threshold);
- break;
- case 'r':
- recursive=1;
- break;
- case 'h':
- help=1;
- break;
- case 'j':
- sscanf(optarg,"%d",&njobs);
- break;
- case 'd':
- sscanf(optarg,"%lf",&threshold);
- break;
- }
- }
- for(;optind<argc;++optind)
- paths.push_back(argv[optind]);
- if(help||argc<2)
- {
- printf(
- "Usage: %s [OPTION] PATH...\n"
- "Detect potentially duplicate images in PATHs and optionally perform an action on them.\n\n"
- " -h, --help Display this help message and exit.\n"
- " -r, --recursive Recurse into all directories.\n"
- " -j, --jobs Number of concurrent tasks to run at once.\n"
- " -d, --threshold Threshold distance below which images will be considered similar.\n"
- ,argv[0]
- );
- return 1;
- }
- if(threshold>1||threshold<0)
- {
- puts("Invalid threshold value.");
- return 2;
- }
- if(threshold<1e-6)threshold=1e-6;
- if(!paths.size())
- {
- puts("Missing image path.");
- return 2;
- }
- return 0;
-}
-
-void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::string>&out)
-{
- if(recursive)
- {
- auto dirit=std::filesystem::recursive_directory_iterator(path);
- for(auto &p:dirit)
- {
- FILE* fp=fopen(p.path().c_str(),"r");
- char c[8];
- fread((void*)c,1,6,fp);
- if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3))
- out.push_back(p.path().string());
- fclose(fp);
- }
- }
- else
- {
- auto dirit=std::filesystem::directory_iterator(path);
- for(auto &p:dirit)
- {
- FILE* fp=fopen(p.path().c_str(),"r");
- char c[8];
- fread((void*)c,1,6,fp);
- if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3))
- out.push_back(p.path().string());
- fclose(fp);
- }
- }
-}
-
-void compute_signature_vectors(const std::vector<std::string>&files,std::vector<signature>&output)
-{
- thread_pool tp(njobs);
- for(size_t i=0;i<files.size();++i)
- {
- auto job_func=[&](int thid,size_t id){
- fprintf(stderr,"spawned: on thread#%d, file#%lu (%s)\n",thid,id,files[id].c_str());
- output[id]=signature::from_file(files[id].c_str());
- fprintf(stderr,"done: file#%lu\n",id);
- output[id].length();
- printf("%d/%lu\r",++ctr,files.size());
- fflush(stdout);
- };
- tp.create_task(job_func,i);
- }
- tp.wait();
-}
-
-void compare_signature_vectors(const std::vector<signature>&vec,std::vector<std::tuple<size_t,size_t,double>>&out)
-{
- thread_pool tp(njobs);
- for(size_t i=0;i<vec.size();++i){if (vec[i].length() < 0) continue;
- for(size_t j=i+1;j<vec.size();++j)
- {
- if (vec[j].length() < 0) continue;
- auto job_func=[&](int thid,size_t ida,size_t idb){
- fprintf(stderr,"spawned: on thread#%d, file#%lu<->file#%lu\n",thid,ida,idb);
- if(true)
- {
- double d=vec[ida].distance(vec[idb]);
- double l=vec[ida].length()+vec[idb].length();
- d/=l;
- if(d<threshold)out.emplace_back(ida,idb,d);
- fprintf(stderr,"done:file#%lu<->file#%lu: %lf\n",ida,idb,d);
- }
- printf("%d/%lu\r",++ctr,vec.size()*(vec.size()-1)/2);
- fflush(stdout);
- };
- tp.create_task(job_func,i,j);
- }}
- tp.wait();
-}
-
-int main(int argc,char** argv)
-{
- if(int pr=parse_arguments(argc,argv))return pr-1;
- puts("building list of files to compare...");
- std::vector<std::string> x;
- for(auto&p:paths)
- build_file_list(p,recursive,x);
- printf("%lu files to compare.\n",x.size());
- puts("computing signature vectors...");
- std::vector<signature> cvecs;
- cvecs.resize(x.size());
- compute_signature_vectors(x,cvecs);
- /*for(auto &v:cvecs)
- {
- fprintf(stderr,"%lu:",v.sizeof_vec);
- for(size_t i=0;i<v.sizeof_vec;++i)
- fprintf(stderr," %d",v.vec[i]);
- fprintf(stderr,"\n");
- }*/
- ctr=0;
- puts("\ncomparing signature vectors...");
- std::vector<std::tuple<size_t,size_t,double>> r;
- compare_signature_vectors(cvecs,r);
- puts("");
- for(auto &t:r)
- printf("%s<->%s: %lf\n",x[std::get<0>(t)].c_str(),x[std::get<1>(t)].c_str(),std::get<2>(t));
- printf("%lu similar images.",r.size());
- cvecs.clear();
- return 0;
-}
diff --git a/xsig/CMakeLists.txt b/xsig/CMakeLists.txt
new file mode 100644
index 0000000..47c1b81
--- /dev/null
+++ b/xsig/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(xsig_SOURCES
+ src/base64.cpp
+ src/imageutil.cpp
+ src/signature.cpp
+ src/subslice_signature.cpp
+ src/signature_db.cpp
+)
+
+add_library(xsig ${xsig_SOURCES})
+
+target_include_directories(xsig PRIVATE ./src)
+target_include_directories(xsig PUBLIC ./include)
+
+target_link_libraries(xsig PRIVATE
+ opencv_core
+ opencv_imgcodecs
+ opencv_imgproc
+ ${SQLite3_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+)
+
+target_compile_options(xsig PRIVATE -Werror=return-type)
diff --git a/signature.hpp b/xsig/include/signature.hpp
index b655e73..b655e73 100644
--- a/signature.hpp
+++ b/xsig/include/signature.hpp
diff --git a/signature_db.hpp b/xsig/include/signature_db.hpp
index b37cf0a..b37cf0a 100644
--- a/signature_db.hpp
+++ b/xsig/include/signature_db.hpp
diff --git a/subslice_signature.hpp b/xsig/include/subslice_signature.hpp
index 928d396..928d396 100644
--- a/subslice_signature.hpp
+++ b/xsig/include/subslice_signature.hpp
diff --git a/base64.cpp b/xsig/src/base64.cpp
index 3dae3a2..3dae3a2 100644
--- a/base64.cpp
+++ b/xsig/src/base64.cpp
diff --git a/base64.hpp b/xsig/src/base64.hpp
index 70d4e40..70d4e40 100644
--- a/base64.hpp
+++ b/xsig/src/base64.hpp
diff --git a/compressed_vector.hpp b/xsig/src/compressed_vector.hpp
index 780a563..780a563 100644
--- a/compressed_vector.hpp
+++ b/xsig/src/compressed_vector.hpp
diff --git a/imageutil.cpp b/xsig/src/imageutil.cpp
index 3fd8a94..3fd8a94 100644
--- a/imageutil.cpp
+++ b/xsig/src/imageutil.cpp
diff --git a/imageutil.hpp b/xsig/src/imageutil.hpp
index f3831b0..f3831b0 100644
--- a/imageutil.hpp
+++ b/xsig/src/imageutil.hpp
diff --git a/signature.cpp b/xsig/src/signature.cpp
index b912198..b912198 100644
--- a/signature.cpp
+++ b/xsig/src/signature.cpp
diff --git a/signature_db.cpp b/xsig/src/signature_db.cpp
index 393b756..393b756 100644
--- a/signature_db.cpp
+++ b/xsig/src/signature_db.cpp
diff --git a/subslice_signature.cpp b/xsig/src/subslice_signature.cpp
index 75b1a43..75b1a43 100644
--- a/subslice_signature.cpp
+++ b/xsig/src/subslice_signature.cpp
diff --git a/thread_pool.hpp b/xsig/src/thread_pool.hpp
index 6aea4ec..6aea4ec 100644
--- a/thread_pool.hpp
+++ b/xsig/src/thread_pool.hpp