commit 875636e3d93ef10560b111f67868e8f08eba3284 Author: Gibheer Date: Sat Mar 20 22:09:42 2021 +0100 initial commit diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..83fd2b9 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module picsort + +go 1.14 + +require ( + github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd + github.com/tajtiattila/metadata v0.0.0-20180130123038-1ef25f4c37ea +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..cb4019d --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd h1:CmH9+J6ZSsIjUK3dcGsnCnO41eRBOnY12zwkn5qVwgc= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= +github.com/tajtiattila/metadata v0.0.0-20180130123038-1ef25f4c37ea h1:BX1nmckcdR9xcJ0J09URwfhP1pQy97TbD+J+qUbzaPo= +github.com/tajtiattila/metadata v0.0.0-20180130123038-1ef25f4c37ea/go.mod h1:qZzqptgLD1Lrl8lLbmFmQbVlu8kM1lOBuWVtfI1OTec= diff --git a/main.go b/main.go new file mode 100644 index 0000000..0d0925a --- /dev/null +++ b/main.go @@ -0,0 +1,198 @@ +package main + +import ( + "crypto/sha512" + "encoding/base64" + "flag" + "fmt" + "io" + "log" + "os" + "path" + "time" + "path/filepath" + "sync" + + "github.com/tajtiattila/metadata" +) + +var ( + directory = flag.String("directory", "", "path of the source directory") + workers = flag.Int("workers", 10, "number of workers to analyze and copy files") + dryRun = flag.Bool("dry-run", true, "don't create directories or files") + target = flag.String("target", "", "path to the target directory") +) + +type ( + picture struct { + path string + hash string + meta *metadata.Metadata + err error + mtime time.Time + } +) + +func main() { + flag.Parse() + + if *directory == "" { + log.Fatalf("no directory given") + } + if *workers == 0 { + log.Fatalf("workers should not be set to 0") + } + if !*dryRun && *target == "" { + log.Fatalf("dry run is disabled but no target directory given") + } + + wg := &sync.WaitGroup{} + input := make(chan string) + output := make(chan picture) + for i := 0; i < *workers; i++ { + go workPictures(input, output) + } + stats := map[string]int{ + "created": 0, + "duplicate": 0, + "error": 0, + } + targets := map[string][]picture{} + pictures := map[string]picture{} + go func() { + // map of hash to picture + // same pictures should have the same hash, so all should work out + for pic := range output { + wg.Done() + if pic.err != nil { + log.Printf("could not handle %s: %s", pic.path, pic.err) + stats["error"] += 1 + continue + } + if pic.hash == "" { + log.Printf("no hash for %s", pic.path) + stats["error"] += 1 + continue + } + if org, found := pictures[pic.hash]; !found { + pictures[pic.hash] = pic + stats["created"] += 1 + } else { + log.Printf("%s is duplicate of %s", pic.path, org.path) + stats["duplicate"] += 1 + continue + } + pic.mtime = pic.meta.DateTimeOriginal.Time + p := pic.meta.DateTimeOriginal.Format("2006/01/20060102_150405") + if _, found := targets[p]; !found { + targets[p] = []picture{pic} + } else { + targets[p] = append(targets[p], pic) + } + } + }() + + if err := filepath.Walk(*directory, func(path string, info os.FileInfo, err error) error { + if info.IsDir() { + return nil + } + wg.Add(1) + input <- path + return nil + }); err != nil { + log.Fatalf("could not walk over directory: %s", err) + } + + wg.Wait() + close(input) + close(output) + + for key, stat := range stats { + fmt.Printf("%s: %d\n", key, stat) + } + + if *dryRun { + return + } + log.Printf("copying files") + for name, sources := range targets { + if len(sources) == 1 { + newpath := path.Join(*target, fmt.Sprintf("%s%s", name, path.Ext(sources[0].path))) + if err := copyFile(sources[0].path, newpath, sources[0].mtime); err != nil { + log.Fatalf("could not copy file from %s to %s: %s", sources[0], newpath, err) + } + } else { + for i, source := range sources { + newpath := path.Join(*target, fmt.Sprintf("%s_%0d%s", name, i, path.Ext(sources[0].path))) + if err := copyFile(source.path, newpath, source.mtime); err != nil { + log.Fatalf("could not copy file from %s to %s: %s", sources[0].path, newpath, err) + } + } + } + } +} + +func workPictures(input chan string, output chan picture) { + for pic := range input { + getPicture(pic, output) + } +} +func getPicture(path string, output chan picture) { + p := picture{path: path} + + f, err := os.Open(path) + if err != nil { + p.err = err + output <- p + return + } + defer f.Close() + + sum := sha512.New() + if _, err := io.Copy(sum, f); err != nil { + p.err = fmt.Errorf("could not get hash sum: %w", err) + output <- p + return + } + p.hash = base64.StdEncoding.EncodeToString(sum.Sum(nil)) + + // reset file position for metadata + if _, err := f.Seek(0, 0); err != nil { + p.err = fmt.Errorf("could not seek in file: %w", err) + output <- p + return + } + + meta, err := metadata.Parse(f) + if err != nil { + p.err = fmt.Errorf("could not parse metadata: %w", err) + output <- p + return + } + p.meta = meta + + output <- p +} + +func copyFile(source, target string, mTime time.Time) error { + if err := os.MkdirAll(path.Dir(target), os.FileMode(0755)); err != nil { + return fmt.Errorf("could not create directory: %w", err) + } + sf, err := os.Open(source) + if err != nil { + return fmt.Errorf("could not open source file %s: %w", source, err) + } + defer sf.Close() + tf, err := os.Create(target) + if err != nil { + return fmt.Errorf("could not open target file %s: %w", target, err) + } + defer tf.Close() + if _, err := io.Copy(tf, sf); err != nil { + return fmt.Errorf("could not copy source %s to target %s: %w", source, target, err) + } + if err := os.Chtimes(target, mTime, mTime); err != nil { + return fmt.Errorf("could not set mtime of target file '%s': %s", target, err) + } + return nil +}