You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
198 lines
4.6 KiB
198 lines
4.6 KiB
package main |
|
|
|
import ( |
|
"crypto/sha512" |
|
"encoding/base64" |
|
"flag" |
|
"fmt" |
|
"io" |
|
"log" |
|
"os" |
|
"path" |
|
"time" |
|
"path/filepath" |
|
"sync" |
|
|
|
"github.com/tajtiattila/metadata" |
|
) |
|
|
|
var ( |
|
directory = flag.String("directory", "", "path of the source directory") |
|
workers = flag.Int("workers", 10, "number of workers to analyze and copy files") |
|
dryRun = flag.Bool("dry-run", true, "don't create directories or files") |
|
target = flag.String("target", "", "path to the target directory") |
|
) |
|
|
|
type ( |
|
picture struct { |
|
path string |
|
hash string |
|
meta *metadata.Metadata |
|
err error |
|
mtime time.Time |
|
} |
|
) |
|
|
|
func main() { |
|
flag.Parse() |
|
|
|
if *directory == "" { |
|
log.Fatalf("no directory given") |
|
} |
|
if *workers == 0 { |
|
log.Fatalf("workers should not be set to 0") |
|
} |
|
if !*dryRun && *target == "" { |
|
log.Fatalf("dry run is disabled but no target directory given") |
|
} |
|
|
|
wg := &sync.WaitGroup{} |
|
input := make(chan string) |
|
output := make(chan picture) |
|
for i := 0; i < *workers; i++ { |
|
go workPictures(input, output) |
|
} |
|
stats := map[string]int{ |
|
"created": 0, |
|
"duplicate": 0, |
|
"error": 0, |
|
} |
|
targets := map[string][]picture{} |
|
pictures := map[string]picture{} |
|
go func() { |
|
// map of hash to picture |
|
// same pictures should have the same hash, so all should work out |
|
for pic := range output { |
|
wg.Done() |
|
if pic.err != nil { |
|
log.Printf("could not handle %s: %s", pic.path, pic.err) |
|
stats["error"] += 1 |
|
continue |
|
} |
|
if pic.hash == "" { |
|
log.Printf("no hash for %s", pic.path) |
|
stats["error"] += 1 |
|
continue |
|
} |
|
if org, found := pictures[pic.hash]; !found { |
|
pictures[pic.hash] = pic |
|
stats["created"] += 1 |
|
} else { |
|
log.Printf("%s is duplicate of %s", pic.path, org.path) |
|
stats["duplicate"] += 1 |
|
continue |
|
} |
|
pic.mtime = pic.meta.DateTimeOriginal.Time |
|
p := pic.meta.DateTimeOriginal.Format("2006/01/20060102_150405") |
|
if _, found := targets[p]; !found { |
|
targets[p] = []picture{pic} |
|
} else { |
|
targets[p] = append(targets[p], pic) |
|
} |
|
} |
|
}() |
|
|
|
if err := filepath.Walk(*directory, func(path string, info os.FileInfo, err error) error { |
|
if info.IsDir() { |
|
return nil |
|
} |
|
wg.Add(1) |
|
input <- path |
|
return nil |
|
}); err != nil { |
|
log.Fatalf("could not walk over directory: %s", err) |
|
} |
|
|
|
wg.Wait() |
|
close(input) |
|
close(output) |
|
|
|
for key, stat := range stats { |
|
fmt.Printf("%s: %d\n", key, stat) |
|
} |
|
|
|
if *dryRun { |
|
return |
|
} |
|
log.Printf("copying files") |
|
for name, sources := range targets { |
|
if len(sources) == 1 { |
|
newpath := path.Join(*target, fmt.Sprintf("%s%s", name, path.Ext(sources[0].path))) |
|
if err := copyFile(sources[0].path, newpath, sources[0].mtime); err != nil { |
|
log.Fatalf("could not copy file from %s to %s: %s", sources[0], newpath, err) |
|
} |
|
} else { |
|
for i, source := range sources { |
|
newpath := path.Join(*target, fmt.Sprintf("%s_%0d%s", name, i, path.Ext(sources[0].path))) |
|
if err := copyFile(source.path, newpath, source.mtime); err != nil { |
|
log.Fatalf("could not copy file from %s to %s: %s", sources[0].path, newpath, err) |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
func workPictures(input chan string, output chan picture) { |
|
for pic := range input { |
|
getPicture(pic, output) |
|
} |
|
} |
|
func getPicture(path string, output chan picture) { |
|
p := picture{path: path} |
|
|
|
f, err := os.Open(path) |
|
if err != nil { |
|
p.err = err |
|
output <- p |
|
return |
|
} |
|
defer f.Close() |
|
|
|
sum := sha512.New() |
|
if _, err := io.Copy(sum, f); err != nil { |
|
p.err = fmt.Errorf("could not get hash sum: %w", err) |
|
output <- p |
|
return |
|
} |
|
p.hash = base64.StdEncoding.EncodeToString(sum.Sum(nil)) |
|
|
|
// reset file position for metadata |
|
if _, err := f.Seek(0, 0); err != nil { |
|
p.err = fmt.Errorf("could not seek in file: %w", err) |
|
output <- p |
|
return |
|
} |
|
|
|
meta, err := metadata.Parse(f) |
|
if err != nil { |
|
p.err = fmt.Errorf("could not parse metadata: %w", err) |
|
output <- p |
|
return |
|
} |
|
p.meta = meta |
|
|
|
output <- p |
|
} |
|
|
|
func copyFile(source, target string, mTime time.Time) error { |
|
if err := os.MkdirAll(path.Dir(target), os.FileMode(0755)); err != nil { |
|
return fmt.Errorf("could not create directory: %w", err) |
|
} |
|
sf, err := os.Open(source) |
|
if err != nil { |
|
return fmt.Errorf("could not open source file %s: %w", source, err) |
|
} |
|
defer sf.Close() |
|
tf, err := os.Create(target) |
|
if err != nil { |
|
return fmt.Errorf("could not open target file %s: %w", target, err) |
|
} |
|
defer tf.Close() |
|
if _, err := io.Copy(tf, sf); err != nil { |
|
return fmt.Errorf("could not copy source %s to target %s: %w", source, target, err) |
|
} |
|
if err := os.Chtimes(target, mTime, mTime); err != nil { |
|
return fmt.Errorf("could not set mtime of target file '%s': %s", target, err) |
|
} |
|
return nil |
|
}
|
|
|