199 lines
4.6 KiB
Go
199 lines
4.6 KiB
Go
|
package main
|
||
|
|
||
|
import (
|
||
|
"crypto/sha512"
|
||
|
"encoding/base64"
|
||
|
"flag"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"log"
|
||
|
"os"
|
||
|
"path"
|
||
|
"time"
|
||
|
"path/filepath"
|
||
|
"sync"
|
||
|
|
||
|
"github.com/tajtiattila/metadata"
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
directory = flag.String("directory", "", "path of the source directory")
|
||
|
workers = flag.Int("workers", 10, "number of workers to analyze and copy files")
|
||
|
dryRun = flag.Bool("dry-run", true, "don't create directories or files")
|
||
|
target = flag.String("target", "", "path to the target directory")
|
||
|
)
|
||
|
|
||
|
type (
|
||
|
picture struct {
|
||
|
path string
|
||
|
hash string
|
||
|
meta *metadata.Metadata
|
||
|
err error
|
||
|
mtime time.Time
|
||
|
}
|
||
|
)
|
||
|
|
||
|
func main() {
|
||
|
flag.Parse()
|
||
|
|
||
|
if *directory == "" {
|
||
|
log.Fatalf("no directory given")
|
||
|
}
|
||
|
if *workers == 0 {
|
||
|
log.Fatalf("workers should not be set to 0")
|
||
|
}
|
||
|
if !*dryRun && *target == "" {
|
||
|
log.Fatalf("dry run is disabled but no target directory given")
|
||
|
}
|
||
|
|
||
|
wg := &sync.WaitGroup{}
|
||
|
input := make(chan string)
|
||
|
output := make(chan picture)
|
||
|
for i := 0; i < *workers; i++ {
|
||
|
go workPictures(input, output)
|
||
|
}
|
||
|
stats := map[string]int{
|
||
|
"created": 0,
|
||
|
"duplicate": 0,
|
||
|
"error": 0,
|
||
|
}
|
||
|
targets := map[string][]picture{}
|
||
|
pictures := map[string]picture{}
|
||
|
go func() {
|
||
|
// map of hash to picture
|
||
|
// same pictures should have the same hash, so all should work out
|
||
|
for pic := range output {
|
||
|
wg.Done()
|
||
|
if pic.err != nil {
|
||
|
log.Printf("could not handle %s: %s", pic.path, pic.err)
|
||
|
stats["error"] += 1
|
||
|
continue
|
||
|
}
|
||
|
if pic.hash == "" {
|
||
|
log.Printf("no hash for %s", pic.path)
|
||
|
stats["error"] += 1
|
||
|
continue
|
||
|
}
|
||
|
if org, found := pictures[pic.hash]; !found {
|
||
|
pictures[pic.hash] = pic
|
||
|
stats["created"] += 1
|
||
|
} else {
|
||
|
log.Printf("%s is duplicate of %s", pic.path, org.path)
|
||
|
stats["duplicate"] += 1
|
||
|
continue
|
||
|
}
|
||
|
pic.mtime = pic.meta.DateTimeOriginal.Time
|
||
|
p := pic.meta.DateTimeOriginal.Format("2006/01/20060102_150405")
|
||
|
if _, found := targets[p]; !found {
|
||
|
targets[p] = []picture{pic}
|
||
|
} else {
|
||
|
targets[p] = append(targets[p], pic)
|
||
|
}
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
if err := filepath.Walk(*directory, func(path string, info os.FileInfo, err error) error {
|
||
|
if info.IsDir() {
|
||
|
return nil
|
||
|
}
|
||
|
wg.Add(1)
|
||
|
input <- path
|
||
|
return nil
|
||
|
}); err != nil {
|
||
|
log.Fatalf("could not walk over directory: %s", err)
|
||
|
}
|
||
|
|
||
|
wg.Wait()
|
||
|
close(input)
|
||
|
close(output)
|
||
|
|
||
|
for key, stat := range stats {
|
||
|
fmt.Printf("%s: %d\n", key, stat)
|
||
|
}
|
||
|
|
||
|
if *dryRun {
|
||
|
return
|
||
|
}
|
||
|
log.Printf("copying files")
|
||
|
for name, sources := range targets {
|
||
|
if len(sources) == 1 {
|
||
|
newpath := path.Join(*target, fmt.Sprintf("%s%s", name, path.Ext(sources[0].path)))
|
||
|
if err := copyFile(sources[0].path, newpath, sources[0].mtime); err != nil {
|
||
|
log.Fatalf("could not copy file from %s to %s: %s", sources[0], newpath, err)
|
||
|
}
|
||
|
} else {
|
||
|
for i, source := range sources {
|
||
|
newpath := path.Join(*target, fmt.Sprintf("%s_%0d%s", name, i, path.Ext(sources[0].path)))
|
||
|
if err := copyFile(source.path, newpath, source.mtime); err != nil {
|
||
|
log.Fatalf("could not copy file from %s to %s: %s", sources[0].path, newpath, err)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func workPictures(input chan string, output chan picture) {
|
||
|
for pic := range input {
|
||
|
getPicture(pic, output)
|
||
|
}
|
||
|
}
|
||
|
func getPicture(path string, output chan picture) {
|
||
|
p := picture{path: path}
|
||
|
|
||
|
f, err := os.Open(path)
|
||
|
if err != nil {
|
||
|
p.err = err
|
||
|
output <- p
|
||
|
return
|
||
|
}
|
||
|
defer f.Close()
|
||
|
|
||
|
sum := sha512.New()
|
||
|
if _, err := io.Copy(sum, f); err != nil {
|
||
|
p.err = fmt.Errorf("could not get hash sum: %w", err)
|
||
|
output <- p
|
||
|
return
|
||
|
}
|
||
|
p.hash = base64.StdEncoding.EncodeToString(sum.Sum(nil))
|
||
|
|
||
|
// reset file position for metadata
|
||
|
if _, err := f.Seek(0, 0); err != nil {
|
||
|
p.err = fmt.Errorf("could not seek in file: %w", err)
|
||
|
output <- p
|
||
|
return
|
||
|
}
|
||
|
|
||
|
meta, err := metadata.Parse(f)
|
||
|
if err != nil {
|
||
|
p.err = fmt.Errorf("could not parse metadata: %w", err)
|
||
|
output <- p
|
||
|
return
|
||
|
}
|
||
|
p.meta = meta
|
||
|
|
||
|
output <- p
|
||
|
}
|
||
|
|
||
|
func copyFile(source, target string, mTime time.Time) error {
|
||
|
if err := os.MkdirAll(path.Dir(target), os.FileMode(0755)); err != nil {
|
||
|
return fmt.Errorf("could not create directory: %w", err)
|
||
|
}
|
||
|
sf, err := os.Open(source)
|
||
|
if err != nil {
|
||
|
return fmt.Errorf("could not open source file %s: %w", source, err)
|
||
|
}
|
||
|
defer sf.Close()
|
||
|
tf, err := os.Create(target)
|
||
|
if err != nil {
|
||
|
return fmt.Errorf("could not open target file %s: %w", target, err)
|
||
|
}
|
||
|
defer tf.Close()
|
||
|
if _, err := io.Copy(tf, sf); err != nil {
|
||
|
return fmt.Errorf("could not copy source %s to target %s: %w", source, target, err)
|
||
|
}
|
||
|
if err := os.Chtimes(target, mTime, mTime); err != nil {
|
||
|
return fmt.Errorf("could not set mtime of target file '%s': %s", target, err)
|
||
|
}
|
||
|
return nil
|
||
|
}
|