diff options
-rw-r--r-- | CMakeLists.txt | 14 | ||||
-rw-r--r-- | tests/CMakeLists.txt | 18 | ||||
-rw-r--r-- | tests/deduper_legacy.cpp | 194 | ||||
-rw-r--r-- | xsig/CMakeLists.txt | 22 | ||||
-rw-r--r-- | xsig/include/signature.hpp (renamed from signature.hpp) | 0 | ||||
-rw-r--r-- | xsig/include/signature_db.hpp (renamed from signature_db.hpp) | 0 | ||||
-rw-r--r-- | xsig/include/subslice_signature.hpp (renamed from subslice_signature.hpp) | 0 | ||||
-rw-r--r-- | xsig/src/base64.cpp (renamed from base64.cpp) | 0 | ||||
-rw-r--r-- | xsig/src/base64.hpp (renamed from base64.hpp) | 0 | ||||
-rw-r--r-- | xsig/src/compressed_vector.hpp (renamed from compressed_vector.hpp) | 0 | ||||
-rw-r--r-- | xsig/src/imageutil.cpp (renamed from imageutil.cpp) | 0 | ||||
-rw-r--r-- | xsig/src/imageutil.hpp (renamed from imageutil.hpp) | 0 | ||||
-rw-r--r-- | xsig/src/signature.cpp (renamed from signature.cpp) | 0 | ||||
-rw-r--r-- | xsig/src/signature_db.cpp (renamed from signature_db.cpp) | 0 | ||||
-rw-r--r-- | xsig/src/subslice_signature.cpp (renamed from subslice_signature.cpp) | 0 | ||||
-rw-r--r-- | xsig/src/thread_pool.hpp (renamed from thread_pool.hpp) | 0 |
16 files changed, 28 insertions, 220 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 6536a47..f535cf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,18 +15,10 @@ SET(CMAKE_EXTRA_INCLUDE_FILES "filesystem") check_type_size("std::filesystem::path::value_type" PATH_VALSIZE LANGUAGE CXX) SET(CMAKE_EXTRA_INCLUDE_FILES) -add_compile_definitions(PATH_VALSIZE=${PATH_VALSIZE}) - -include_directories(.) +option(BUILD_SHARED_LIBS ON) -add_library(xsig STATIC - imageutil.cpp - signature.cpp - subslice_signature.cpp - signature_db.cpp - base64.cpp -) +add_compile_definitions(PATH_VALSIZE=${PATH_VALSIZE}) -target_compile_options(xsig PRIVATE -Werror=return-type) +add_subdirectory(xsig) add_subdirectory(tests) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 78ad4fe..fa76ab9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,6 @@ +get_target_property(xsig_priv_incdir xsig INCLUDE_DIRECTORIES) +include_directories(compressed_vector ${xsig_priv_incdir}) + add_executable(compressed_vector compressed_vector.cpp) target_link_libraries(compressed_vector xsig @@ -20,18 +23,8 @@ target_link_libraries(image_util_tests add_executable(signature_test signature_test.cpp) target_link_libraries(signature_test xsig - opencv_core - opencv_imgcodecs - opencv_imgproc ) -#add_executable(deduper_legacy deduper_legacy.cpp) -#target_link_libraries(deduper_legacy -# ${OpenCV_LIBS} -# ${CMAKE_THREAD_LIBS_INIT} -# xsig -#) - add_executable(testdrive testdrive.cpp) target_link_libraries(testdrive xsig @@ -47,11 +40,6 @@ endif() add_executable(testdrive_sqlite testdrive_sqlite.cpp) target_link_libraries(testdrive_sqlite xsig - opencv_core - opencv_imgcodecs - opencv_imgproc - ${SQLite3_LIBRARIES} - ${CMAKE_THREAD_LIBS_INIT} ) if(WIN32) target_link_libraries(testdrive_sqlite shell32 kernel32) diff --git a/tests/deduper_legacy.cpp b/tests/deduper_legacy.cpp deleted file mode 100644 index bcd8514..0000000 --- a/tests/deduper_legacy.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include "signature.hpp" - -#include <cstdio> -#include <cstring> - -#include <filesystem> -#include <string> -#include <unordered_map> -#include <utility> -#include <vector> - -#include <getopt.h> - -#include "thread_pool.hpp" - -int ctr; -int recursive; -int njobs=1; -double threshold=0.3; -std::vector<std::string> paths; - -int parse_arguments(int argc,char **argv) -{ - recursive=0; - int help=0; - option longopt[]= - { - {"recursive",no_argument ,&recursive,1}, -// {"destdir" ,required_argument,0 ,'D'}, - {"jobs" ,required_argument,0 ,'j'}, - {"threshold",required_argument,0 ,'d'}, - {"help" ,no_argument ,&help ,1}, - {0 ,0 ,0 ,0} - }; - while(1) - { - int idx=0; - int c=getopt_long(argc,argv,"rhj:d:",longopt,&idx); - if(!~c)break; - switch(c) - { - case 0: - if(longopt[idx].flag)break; - if(std::string("jobs")==longopt[idx].name) - sscanf(optarg,"%d",&njobs); - if(std::string("threshold")==longopt[idx].name) - sscanf(optarg,"%lf",&threshold); - break; - case 'r': - recursive=1; - break; - case 'h': - help=1; - break; - case 'j': - sscanf(optarg,"%d",&njobs); - break; - case 'd': - sscanf(optarg,"%lf",&threshold); - break; - } - } - for(;optind<argc;++optind) - paths.push_back(argv[optind]); - if(help||argc<2) - { - printf( - "Usage: %s [OPTION] PATH...\n" - "Detect potentially duplicate images in PATHs and optionally perform an action on them.\n\n" - " -h, --help Display this help message and exit.\n" - " -r, --recursive Recurse into all directories.\n" - " -j, --jobs Number of concurrent tasks to run at once.\n" - " -d, --threshold Threshold distance below which images will be considered similar.\n" - ,argv[0] - ); - return 1; - } - if(threshold>1||threshold<0) - { - puts("Invalid threshold value."); - return 2; - } - if(threshold<1e-6)threshold=1e-6; - if(!paths.size()) - { - puts("Missing image path."); - return 2; - } - return 0; -} - -void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::string>&out) -{ - if(recursive) - { - auto dirit=std::filesystem::recursive_directory_iterator(path); - for(auto &p:dirit) - { - FILE* fp=fopen(p.path().c_str(),"r"); - char c[8]; - fread((void*)c,1,6,fp); - if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) - out.push_back(p.path().string()); - fclose(fp); - } - } - else - { - auto dirit=std::filesystem::directory_iterator(path); - for(auto &p:dirit) - { - FILE* fp=fopen(p.path().c_str(),"r"); - char c[8]; - fread((void*)c,1,6,fp); - if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) - out.push_back(p.path().string()); - fclose(fp); - } - } -} - -void compute_signature_vectors(const std::vector<std::string>&files,std::vector<signature>&output) -{ - thread_pool tp(njobs); - for(size_t i=0;i<files.size();++i) - { - auto job_func=[&](int thid,size_t id){ - fprintf(stderr,"spawned: on thread#%d, file#%lu (%s)\n",thid,id,files[id].c_str()); - output[id]=signature::from_file(files[id].c_str()); - fprintf(stderr,"done: file#%lu\n",id); - output[id].length(); - printf("%d/%lu\r",++ctr,files.size()); - fflush(stdout); - }; - tp.create_task(job_func,i); - } - tp.wait(); -} - -void compare_signature_vectors(const std::vector<signature>&vec,std::vector<std::tuple<size_t,size_t,double>>&out) -{ - thread_pool tp(njobs); - for(size_t i=0;i<vec.size();++i){if (vec[i].length() < 0) continue; - for(size_t j=i+1;j<vec.size();++j) - { - if (vec[j].length() < 0) continue; - auto job_func=[&](int thid,size_t ida,size_t idb){ - fprintf(stderr,"spawned: on thread#%d, file#%lu<->file#%lu\n",thid,ida,idb); - if(true) - { - double d=vec[ida].distance(vec[idb]); - double l=vec[ida].length()+vec[idb].length(); - d/=l; - if(d<threshold)out.emplace_back(ida,idb,d); - fprintf(stderr,"done:file#%lu<->file#%lu: %lf\n",ida,idb,d); - } - printf("%d/%lu\r",++ctr,vec.size()*(vec.size()-1)/2); - fflush(stdout); - }; - tp.create_task(job_func,i,j); - }} - tp.wait(); -} - -int main(int argc,char** argv) -{ - if(int pr=parse_arguments(argc,argv))return pr-1; - puts("building list of files to compare..."); - std::vector<std::string> x; - for(auto&p:paths) - build_file_list(p,recursive,x); - printf("%lu files to compare.\n",x.size()); - puts("computing signature vectors..."); - std::vector<signature> cvecs; - cvecs.resize(x.size()); - compute_signature_vectors(x,cvecs); - /*for(auto &v:cvecs) - { - fprintf(stderr,"%lu:",v.sizeof_vec); - for(size_t i=0;i<v.sizeof_vec;++i) - fprintf(stderr," %d",v.vec[i]); - fprintf(stderr,"\n"); - }*/ - ctr=0; - puts("\ncomparing signature vectors..."); - std::vector<std::tuple<size_t,size_t,double>> r; - compare_signature_vectors(cvecs,r); - puts(""); - for(auto &t:r) - printf("%s<->%s: %lf\n",x[std::get<0>(t)].c_str(),x[std::get<1>(t)].c_str(),std::get<2>(t)); - printf("%lu similar images.",r.size()); - cvecs.clear(); - return 0; -} diff --git a/xsig/CMakeLists.txt b/xsig/CMakeLists.txt new file mode 100644 index 0000000..47c1b81 --- /dev/null +++ b/xsig/CMakeLists.txt @@ -0,0 +1,22 @@ +set(xsig_SOURCES + src/base64.cpp + src/imageutil.cpp + src/signature.cpp + src/subslice_signature.cpp + src/signature_db.cpp +) + +add_library(xsig ${xsig_SOURCES}) + +target_include_directories(xsig PRIVATE ./src) +target_include_directories(xsig PUBLIC ./include) + +target_link_libraries(xsig PRIVATE + opencv_core + opencv_imgcodecs + opencv_imgproc + ${SQLite3_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} +) + +target_compile_options(xsig PRIVATE -Werror=return-type) diff --git a/signature.hpp b/xsig/include/signature.hpp index b655e73..b655e73 100644 --- a/signature.hpp +++ b/xsig/include/signature.hpp diff --git a/signature_db.hpp b/xsig/include/signature_db.hpp index b37cf0a..b37cf0a 100644 --- a/signature_db.hpp +++ b/xsig/include/signature_db.hpp diff --git a/subslice_signature.hpp b/xsig/include/subslice_signature.hpp index 928d396..928d396 100644 --- a/subslice_signature.hpp +++ b/xsig/include/subslice_signature.hpp diff --git a/base64.cpp b/xsig/src/base64.cpp index 3dae3a2..3dae3a2 100644 --- a/base64.cpp +++ b/xsig/src/base64.cpp diff --git a/base64.hpp b/xsig/src/base64.hpp index 70d4e40..70d4e40 100644 --- a/base64.hpp +++ b/xsig/src/base64.hpp diff --git a/compressed_vector.hpp b/xsig/src/compressed_vector.hpp index 780a563..780a563 100644 --- a/compressed_vector.hpp +++ b/xsig/src/compressed_vector.hpp diff --git a/imageutil.cpp b/xsig/src/imageutil.cpp index 3fd8a94..3fd8a94 100644 --- a/imageutil.cpp +++ b/xsig/src/imageutil.cpp diff --git a/imageutil.hpp b/xsig/src/imageutil.hpp index f3831b0..f3831b0 100644 --- a/imageutil.hpp +++ b/xsig/src/imageutil.hpp diff --git a/signature.cpp b/xsig/src/signature.cpp index b912198..b912198 100644 --- a/signature.cpp +++ b/xsig/src/signature.cpp diff --git a/signature_db.cpp b/xsig/src/signature_db.cpp index 393b756..393b756 100644 --- a/signature_db.cpp +++ b/xsig/src/signature_db.cpp diff --git a/subslice_signature.cpp b/xsig/src/subslice_signature.cpp index 75b1a43..75b1a43 100644 --- a/subslice_signature.cpp +++ b/xsig/src/subslice_signature.cpp diff --git a/thread_pool.hpp b/xsig/src/thread_pool.hpp index 6aea4ec..6aea4ec 100644 --- a/thread_pool.hpp +++ b/xsig/src/thread_pool.hpp |