picsort/main.go

199 lines
4.6 KiB
Go
Raw Permalink Normal View History

2021-03-20 22:09:42 +01:00
package main
import (
"crypto/sha512"
"encoding/base64"
"flag"
"fmt"
"io"
"log"
"os"
"path"
"time"
"path/filepath"
"sync"
"github.com/tajtiattila/metadata"
)
var (
directory = flag.String("directory", "", "path of the source directory")
workers = flag.Int("workers", 10, "number of workers to analyze and copy files")
dryRun = flag.Bool("dry-run", true, "don't create directories or files")
target = flag.String("target", "", "path to the target directory")
)
type (
picture struct {
path string
hash string
meta *metadata.Metadata
err error
mtime time.Time
}
)
func main() {
flag.Parse()
if *directory == "" {
log.Fatalf("no directory given")
}
if *workers == 0 {
log.Fatalf("workers should not be set to 0")
}
if !*dryRun && *target == "" {
log.Fatalf("dry run is disabled but no target directory given")
}
wg := &sync.WaitGroup{}
input := make(chan string)
output := make(chan picture)
for i := 0; i < *workers; i++ {
go workPictures(input, output)
}
stats := map[string]int{
"created": 0,
"duplicate": 0,
"error": 0,
}
targets := map[string][]picture{}
pictures := map[string]picture{}
go func() {
// map of hash to picture
// same pictures should have the same hash, so all should work out
for pic := range output {
wg.Done()
if pic.err != nil {
log.Printf("could not handle %s: %s", pic.path, pic.err)
stats["error"] += 1
continue
}
if pic.hash == "" {
log.Printf("no hash for %s", pic.path)
stats["error"] += 1
continue
}
if org, found := pictures[pic.hash]; !found {
pictures[pic.hash] = pic
stats["created"] += 1
} else {
log.Printf("%s is duplicate of %s", pic.path, org.path)
stats["duplicate"] += 1
continue
}
pic.mtime = pic.meta.DateTimeOriginal.Time
p := pic.meta.DateTimeOriginal.Format("2006/01/20060102_150405")
if _, found := targets[p]; !found {
targets[p] = []picture{pic}
} else {
targets[p] = append(targets[p], pic)
}
}
}()
if err := filepath.Walk(*directory, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
wg.Add(1)
input <- path
return nil
}); err != nil {
log.Fatalf("could not walk over directory: %s", err)
}
wg.Wait()
close(input)
close(output)
for key, stat := range stats {
fmt.Printf("%s: %d\n", key, stat)
}
if *dryRun {
return
}
log.Printf("copying files")
for name, sources := range targets {
if len(sources) == 1 {
newpath := path.Join(*target, fmt.Sprintf("%s%s", name, path.Ext(sources[0].path)))
if err := copyFile(sources[0].path, newpath, sources[0].mtime); err != nil {
log.Fatalf("could not copy file from %s to %s: %s", sources[0], newpath, err)
}
} else {
for i, source := range sources {
newpath := path.Join(*target, fmt.Sprintf("%s_%0d%s", name, i, path.Ext(sources[0].path)))
if err := copyFile(source.path, newpath, source.mtime); err != nil {
log.Fatalf("could not copy file from %s to %s: %s", sources[0].path, newpath, err)
}
}
}
}
}
func workPictures(input chan string, output chan picture) {
for pic := range input {
getPicture(pic, output)
}
}
func getPicture(path string, output chan picture) {
p := picture{path: path}
f, err := os.Open(path)
if err != nil {
p.err = err
output <- p
return
}
defer f.Close()
sum := sha512.New()
if _, err := io.Copy(sum, f); err != nil {
p.err = fmt.Errorf("could not get hash sum: %w", err)
output <- p
return
}
p.hash = base64.StdEncoding.EncodeToString(sum.Sum(nil))
// reset file position for metadata
if _, err := f.Seek(0, 0); err != nil {
p.err = fmt.Errorf("could not seek in file: %w", err)
output <- p
return
}
meta, err := metadata.Parse(f)
if err != nil {
p.err = fmt.Errorf("could not parse metadata: %w", err)
output <- p
return
}
p.meta = meta
output <- p
}
func copyFile(source, target string, mTime time.Time) error {
if err := os.MkdirAll(path.Dir(target), os.FileMode(0755)); err != nil {
return fmt.Errorf("could not create directory: %w", err)
}
sf, err := os.Open(source)
if err != nil {
return fmt.Errorf("could not open source file %s: %w", source, err)
}
defer sf.Close()
tf, err := os.Create(target)
if err != nil {
return fmt.Errorf("could not open target file %s: %w", target, err)
}
defer tf.Close()
if _, err := io.Copy(tf, sf); err != nil {
return fmt.Errorf("could not copy source %s to target %s: %w", source, target, err)
}
if err := os.Chtimes(target, mTime, mTime); err != nil {
return fmt.Errorf("could not set mtime of target file '%s': %s", target, err)
}
return nil
}