package main import ( "crypto/sha512" "encoding/base64" "flag" "fmt" "io" "log" "os" "path" "time" "path/filepath" "sync" "github.com/tajtiattila/metadata" ) var ( directory = flag.String("directory", "", "path of the source directory") workers = flag.Int("workers", 10, "number of workers to analyze and copy files") dryRun = flag.Bool("dry-run", true, "don't create directories or files") target = flag.String("target", "", "path to the target directory") ) type ( picture struct { path string hash string meta *metadata.Metadata err error mtime time.Time } ) func main() { flag.Parse() if *directory == "" { log.Fatalf("no directory given") } if *workers == 0 { log.Fatalf("workers should not be set to 0") } if !*dryRun && *target == "" { log.Fatalf("dry run is disabled but no target directory given") } wg := &sync.WaitGroup{} input := make(chan string) output := make(chan picture) for i := 0; i < *workers; i++ { go workPictures(input, output) } stats := map[string]int{ "created": 0, "duplicate": 0, "error": 0, } targets := map[string][]picture{} pictures := map[string]picture{} go func() { // map of hash to picture // same pictures should have the same hash, so all should work out for pic := range output { wg.Done() if pic.err != nil { log.Printf("could not handle %s: %s", pic.path, pic.err) stats["error"] += 1 continue } if pic.hash == "" { log.Printf("no hash for %s", pic.path) stats["error"] += 1 continue } if org, found := pictures[pic.hash]; !found { pictures[pic.hash] = pic stats["created"] += 1 } else { log.Printf("%s is duplicate of %s", pic.path, org.path) stats["duplicate"] += 1 continue } pic.mtime = pic.meta.DateTimeOriginal.Time p := pic.meta.DateTimeOriginal.Format("2006/01/20060102_150405") if _, found := targets[p]; !found { targets[p] = []picture{pic} } else { targets[p] = append(targets[p], pic) } } }() if err := filepath.Walk(*directory, func(path string, info os.FileInfo, err error) error { if info.IsDir() { return nil } wg.Add(1) input <- path return nil }); err != nil { log.Fatalf("could not walk over directory: %s", err) } wg.Wait() close(input) close(output) for key, stat := range stats { fmt.Printf("%s: %d\n", key, stat) } if *dryRun { return } log.Printf("copying files") for name, sources := range targets { if len(sources) == 1 { newpath := path.Join(*target, fmt.Sprintf("%s%s", name, path.Ext(sources[0].path))) if err := copyFile(sources[0].path, newpath, sources[0].mtime); err != nil { log.Fatalf("could not copy file from %s to %s: %s", sources[0], newpath, err) } } else { for i, source := range sources { newpath := path.Join(*target, fmt.Sprintf("%s_%0d%s", name, i, path.Ext(sources[0].path))) if err := copyFile(source.path, newpath, source.mtime); err != nil { log.Fatalf("could not copy file from %s to %s: %s", sources[0].path, newpath, err) } } } } } func workPictures(input chan string, output chan picture) { for pic := range input { getPicture(pic, output) } } func getPicture(path string, output chan picture) { p := picture{path: path} f, err := os.Open(path) if err != nil { p.err = err output <- p return } defer f.Close() sum := sha512.New() if _, err := io.Copy(sum, f); err != nil { p.err = fmt.Errorf("could not get hash sum: %w", err) output <- p return } p.hash = base64.StdEncoding.EncodeToString(sum.Sum(nil)) // reset file position for metadata if _, err := f.Seek(0, 0); err != nil { p.err = fmt.Errorf("could not seek in file: %w", err) output <- p return } meta, err := metadata.Parse(f) if err != nil { p.err = fmt.Errorf("could not parse metadata: %w", err) output <- p return } p.meta = meta output <- p } func copyFile(source, target string, mTime time.Time) error { if err := os.MkdirAll(path.Dir(target), os.FileMode(0755)); err != nil { return fmt.Errorf("could not create directory: %w", err) } sf, err := os.Open(source) if err != nil { return fmt.Errorf("could not open source file %s: %w", source, err) } defer sf.Close() tf, err := os.Create(target) if err != nil { return fmt.Errorf("could not open target file %s: %w", target, err) } defer tf.Close() if _, err := io.Copy(tf, sf); err != nil { return fmt.Errorf("could not copy source %s to target %s: %w", source, target, err) } if err := os.Chtimes(target, mTime, mTime); err != nil { return fmt.Errorf("could not set mtime of target file '%s': %s", target, err) } return nil }