| Makoto Onuki | 620e6ad | 2017-03-16 09:40:08 -0700 | [diff] [blame] | 1 | // Copyright 2017 Google Inc. All rights reserved. | 
|  | 2 | // | 
|  | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 4 | // you may not use this file except in compliance with the License. | 
|  | 5 | // You may obtain a copy of the License at | 
|  | 6 | // | 
|  | 7 | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 8 | // | 
|  | 9 | // Unless required by applicable law or agreed to in writing, software | 
|  | 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 12 | // See the License for the specific language governing permissions and | 
|  | 13 | // limitations under the License. | 
|  | 14 |  | 
|  | 15 | // fileslist.py replacement written in GO, which utilizes multi-cores. | 
|  | 16 |  | 
|  | 17 | package main | 
|  | 18 |  | 
|  | 19 | import ( | 
|  | 20 | "crypto/sha256" | 
|  | 21 | "encoding/json" | 
|  | 22 | "flag" | 
|  | 23 | "fmt" | 
|  | 24 | "io" | 
|  | 25 | "os" | 
|  | 26 | "path/filepath" | 
|  | 27 | "runtime" | 
|  | 28 | "sort" | 
|  | 29 | "strings" | 
|  | 30 | "sync" | 
|  | 31 | ) | 
|  | 32 |  | 
|  | 33 | const ( | 
|  | 34 | MAX_DEFAULT_PARA = 24 | 
|  | 35 | ) | 
|  | 36 |  | 
|  | 37 | func defaultPara() int { | 
|  | 38 | ret := runtime.NumCPU() | 
|  | 39 | if ret > MAX_DEFAULT_PARA { | 
|  | 40 | return MAX_DEFAULT_PARA | 
|  | 41 | } | 
|  | 42 | return ret | 
|  | 43 | } | 
|  | 44 |  | 
|  | 45 | var ( | 
|  | 46 | para = flag.Int("para", defaultPara(), "Number of goroutines") | 
|  | 47 | ) | 
|  | 48 |  | 
|  | 49 | // Represents each file. | 
|  | 50 | type Node struct { | 
|  | 51 | SHA256 string | 
|  | 52 | Name   string // device side path. | 
|  | 53 | Size   int64 | 
|  | 54 | path   string // host side path. | 
|  | 55 | stat   os.FileInfo | 
|  | 56 | } | 
|  | 57 |  | 
|  | 58 | func newNode(hostPath string, devicePath string, stat os.FileInfo) Node { | 
|  | 59 | return Node{Name: devicePath, path: hostPath, stat: stat} | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | // Scan a Node and returns true if it should be added to the result. | 
|  | 63 | func (n *Node) scan() bool { | 
|  | 64 | n.Size = n.stat.Size() | 
|  | 65 |  | 
|  | 66 | // Calculate SHA256. | 
| Makoto Onuki | 620e6ad | 2017-03-16 09:40:08 -0700 | [diff] [blame] | 67 | h := sha256.New() | 
| Sen Jiang | 1b4141f | 2017-03-28 15:54:05 -0700 | [diff] [blame] | 68 | if n.stat.Mode()&os.ModeSymlink == 0 { | 
|  | 69 | f, err := os.Open(n.path) | 
|  | 70 | if err != nil { | 
|  | 71 | panic(err) | 
|  | 72 | } | 
|  | 73 | defer f.Close() | 
|  | 74 |  | 
|  | 75 | if _, err := io.Copy(h, f); err != nil { | 
|  | 76 | panic(err) | 
|  | 77 | } | 
|  | 78 | } else { | 
|  | 79 | // Hash the content of symlink, not the file it points to. | 
|  | 80 | s, err := os.Readlink(n.path) | 
|  | 81 | if err != nil { | 
|  | 82 | panic(err) | 
|  | 83 | } | 
|  | 84 | if _, err := io.WriteString(h, s); err != nil { | 
|  | 85 | panic(err) | 
|  | 86 | } | 
| Makoto Onuki | 620e6ad | 2017-03-16 09:40:08 -0700 | [diff] [blame] | 87 | } | 
|  | 88 | n.SHA256 = fmt.Sprintf("%x", h.Sum(nil)) | 
|  | 89 | return true | 
|  | 90 | } | 
|  | 91 |  | 
|  | 92 | func main() { | 
|  | 93 | flag.Parse() | 
|  | 94 |  | 
|  | 95 | allOutput := make([]Node, 0, 1024) // Store all outputs. | 
|  | 96 | mutex := &sync.Mutex{}             // Guard allOutput | 
|  | 97 |  | 
|  | 98 | ch := make(chan Node) // Pass nodes to goroutines. | 
|  | 99 |  | 
|  | 100 | var wg sync.WaitGroup // To wait for all goroutines. | 
|  | 101 | wg.Add(*para) | 
|  | 102 |  | 
|  | 103 | // Scan files in multiple goroutines. | 
|  | 104 | for i := 0; i < *para; i++ { | 
|  | 105 | go func() { | 
|  | 106 | defer wg.Done() | 
|  | 107 |  | 
|  | 108 | output := make([]Node, 0, 1024) // Local output list. | 
|  | 109 | for node := range ch { | 
|  | 110 | if node.scan() { | 
|  | 111 | output = append(output, node) | 
|  | 112 | } | 
|  | 113 | } | 
|  | 114 | // Add to the global output list. | 
|  | 115 | mutex.Lock() | 
|  | 116 | allOutput = append(allOutput, output...) | 
|  | 117 | mutex.Unlock() | 
|  | 118 | }() | 
|  | 119 | } | 
|  | 120 |  | 
|  | 121 | // Walk the directories and find files to scan. | 
|  | 122 | for _, dir := range flag.Args() { | 
|  | 123 | absDir, err := filepath.Abs(dir) | 
|  | 124 | if err != nil { | 
|  | 125 | panic(err) | 
|  | 126 | } | 
|  | 127 | deviceRoot := filepath.Clean(absDir + "/..") | 
|  | 128 | err = filepath.Walk(dir, func(path string, stat os.FileInfo, err error) error { | 
|  | 129 | if err != nil { | 
|  | 130 | panic(err) | 
|  | 131 | } | 
|  | 132 | if stat.IsDir() { | 
|  | 133 | return nil | 
|  | 134 | } | 
|  | 135 | absPath, err := filepath.Abs(path) | 
|  | 136 | if err != nil { | 
|  | 137 | panic(err) | 
|  | 138 | } | 
|  | 139 | devicePath, err := filepath.Rel(deviceRoot, absPath) | 
|  | 140 | if err != nil { | 
|  | 141 | panic(err) | 
|  | 142 | } | 
|  | 143 | devicePath = "/" + devicePath | 
|  | 144 | ch <- newNode(absPath, devicePath, stat) | 
|  | 145 | return nil | 
|  | 146 | }) | 
|  | 147 | if err != nil { | 
|  | 148 | panic(err) | 
|  | 149 | } | 
|  | 150 | } | 
|  | 151 |  | 
|  | 152 | // Wait until all the goroutines finish. | 
|  | 153 | close(ch) | 
|  | 154 | wg.Wait() | 
|  | 155 |  | 
|  | 156 | // Sort the entries and dump as json. | 
|  | 157 | sort.Slice(allOutput, func(i, j int) bool { | 
|  | 158 | if allOutput[i].Size > allOutput[j].Size { | 
|  | 159 | return true | 
|  | 160 | } | 
|  | 161 | if allOutput[i].Size == allOutput[j].Size && strings.Compare(allOutput[i].Name, allOutput[j].Name) > 0 { | 
|  | 162 | return true | 
|  | 163 | } | 
|  | 164 | return false | 
|  | 165 | }) | 
|  | 166 |  | 
|  | 167 | j, err := json.MarshalIndent(allOutput, "", "  ") | 
|  | 168 | if err != nil { | 
|  | 169 | panic(nil) | 
|  | 170 | } | 
|  | 171 |  | 
|  | 172 | fmt.Printf("%s\n", j) | 
|  | 173 | } |