diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/CMakeLists.txt | 18 | ||||
-rw-r--r-- | tests/deduper_legacy.cpp | 194 |
2 files changed, 3 insertions, 209 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 78ad4fe..fa76ab9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,6 @@ +get_target_property(xsig_priv_incdir xsig INCLUDE_DIRECTORIES) +include_directories(compressed_vector ${xsig_priv_incdir}) + add_executable(compressed_vector compressed_vector.cpp) target_link_libraries(compressed_vector xsig @@ -20,18 +23,8 @@ target_link_libraries(image_util_tests add_executable(signature_test signature_test.cpp) target_link_libraries(signature_test xsig - opencv_core - opencv_imgcodecs - opencv_imgproc ) -#add_executable(deduper_legacy deduper_legacy.cpp) -#target_link_libraries(deduper_legacy -# ${OpenCV_LIBS} -# ${CMAKE_THREAD_LIBS_INIT} -# xsig -#) - add_executable(testdrive testdrive.cpp) target_link_libraries(testdrive xsig @@ -47,11 +40,6 @@ endif() add_executable(testdrive_sqlite testdrive_sqlite.cpp) target_link_libraries(testdrive_sqlite xsig - opencv_core - opencv_imgcodecs - opencv_imgproc - ${SQLite3_LIBRARIES} - ${CMAKE_THREAD_LIBS_INIT} ) if(WIN32) target_link_libraries(testdrive_sqlite shell32 kernel32) diff --git a/tests/deduper_legacy.cpp b/tests/deduper_legacy.cpp deleted file mode 100644 index bcd8514..0000000 --- a/tests/deduper_legacy.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include "signature.hpp" - -#include <cstdio> -#include <cstring> - -#include <filesystem> -#include <string> -#include <unordered_map> -#include <utility> -#include <vector> - -#include <getopt.h> - -#include "thread_pool.hpp" - -int ctr; -int recursive; -int njobs=1; -double threshold=0.3; -std::vector<std::string> paths; - -int parse_arguments(int argc,char **argv) -{ - recursive=0; - int help=0; - option longopt[]= - { - {"recursive",no_argument ,&recursive,1}, -// {"destdir" ,required_argument,0 ,'D'}, - {"jobs" ,required_argument,0 ,'j'}, - {"threshold",required_argument,0 ,'d'}, - {"help" ,no_argument ,&help ,1}, - {0 ,0 ,0 ,0} - }; - while(1) - { - int idx=0; - int c=getopt_long(argc,argv,"rhj:d:",longopt,&idx); - if(!~c)break; - switch(c) - { - case 0: - if(longopt[idx].flag)break; - if(std::string("jobs")==longopt[idx].name) - sscanf(optarg,"%d",&njobs); - if(std::string("threshold")==longopt[idx].name) - sscanf(optarg,"%lf",&threshold); - break; - case 'r': - recursive=1; - break; - case 'h': - help=1; - break; - case 'j': - sscanf(optarg,"%d",&njobs); - break; - case 'd': - sscanf(optarg,"%lf",&threshold); - break; - } - } - for(;optind<argc;++optind) - paths.push_back(argv[optind]); - if(help||argc<2) - { - printf( - "Usage: %s [OPTION] PATH...\n" - "Detect potentially duplicate images in PATHs and optionally perform an action on them.\n\n" - " -h, --help Display this help message and exit.\n" - " -r, --recursive Recurse into all directories.\n" - " -j, --jobs Number of concurrent tasks to run at once.\n" - " -d, --threshold Threshold distance below which images will be considered similar.\n" - ,argv[0] - ); - return 1; - } - if(threshold>1||threshold<0) - { - puts("Invalid threshold value."); - return 2; - } - if(threshold<1e-6)threshold=1e-6; - if(!paths.size()) - { - puts("Missing image path."); - return 2; - } - return 0; -} - -void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::string>&out) -{ - if(recursive) - { - auto dirit=std::filesystem::recursive_directory_iterator(path); - for(auto &p:dirit) - { - FILE* fp=fopen(p.path().c_str(),"r"); - char c[8]; - fread((void*)c,1,6,fp); - if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) - out.push_back(p.path().string()); - fclose(fp); - } - } - else - { - auto dirit=std::filesystem::directory_iterator(path); - for(auto &p:dirit) - { - FILE* fp=fopen(p.path().c_str(),"r"); - char c[8]; - fread((void*)c,1,6,fp); - if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) - out.push_back(p.path().string()); - fclose(fp); - } - } -} - -void compute_signature_vectors(const std::vector<std::string>&files,std::vector<signature>&output) -{ - thread_pool tp(njobs); - for(size_t i=0;i<files.size();++i) - { - auto job_func=[&](int thid,size_t id){ - fprintf(stderr,"spawned: on thread#%d, file#%lu (%s)\n",thid,id,files[id].c_str()); - output[id]=signature::from_file(files[id].c_str()); - fprintf(stderr,"done: file#%lu\n",id); - output[id].length(); - printf("%d/%lu\r",++ctr,files.size()); - fflush(stdout); - }; - tp.create_task(job_func,i); - } - tp.wait(); -} - -void compare_signature_vectors(const std::vector<signature>&vec,std::vector<std::tuple<size_t,size_t,double>>&out) -{ - thread_pool tp(njobs); - for(size_t i=0;i<vec.size();++i){if (vec[i].length() < 0) continue; - for(size_t j=i+1;j<vec.size();++j) - { - if (vec[j].length() < 0) continue; - auto job_func=[&](int thid,size_t ida,size_t idb){ - fprintf(stderr,"spawned: on thread#%d, file#%lu<->file#%lu\n",thid,ida,idb); - if(true) - { - double d=vec[ida].distance(vec[idb]); - double l=vec[ida].length()+vec[idb].length(); - d/=l; - if(d<threshold)out.emplace_back(ida,idb,d); - fprintf(stderr,"done:file#%lu<->file#%lu: %lf\n",ida,idb,d); - } - printf("%d/%lu\r",++ctr,vec.size()*(vec.size()-1)/2); - fflush(stdout); - }; - tp.create_task(job_func,i,j); - }} - tp.wait(); -} - -int main(int argc,char** argv) -{ - if(int pr=parse_arguments(argc,argv))return pr-1; - puts("building list of files to compare..."); - std::vector<std::string> x; - for(auto&p:paths) - build_file_list(p,recursive,x); - printf("%lu files to compare.\n",x.size()); - puts("computing signature vectors..."); - std::vector<signature> cvecs; - cvecs.resize(x.size()); - compute_signature_vectors(x,cvecs); - /*for(auto &v:cvecs) - { - fprintf(stderr,"%lu:",v.sizeof_vec); - for(size_t i=0;i<v.sizeof_vec;++i) - fprintf(stderr," %d",v.vec[i]); - fprintf(stderr,"\n"); - }*/ - ctr=0; - puts("\ncomparing signature vectors..."); - std::vector<std::tuple<size_t,size_t,double>> r; - compare_signature_vectors(cvecs,r); - puts(""); - for(auto &t:r) - printf("%s<->%s: %lf\n",x[std::get<0>(t)].c_str(),x[std::get<1>(t)].c_str(),std::get<2>(t)); - printf("%lu similar images.",r.size()); - cvecs.clear(); - return 0; -} |