Add a symbols_map tool for extracting identifiers from elf and r8 files

Add a symbols_map tool that can extract an identifiying hash from
and elf file or an r8 dictionary.  The tool writes the hash to a
textproto, and also supports a merge mode to combine textprotos into
a output file for inclusion in the build artifacts.

Bug: 218888599
Test: m dist
Test: symbols_map_test.go
Change-Id: Icd3ed6e5510e058c92d97c78759e7a4cfcdbb6ca
diff --git a/cmd/symbols_map/Android.bp b/cmd/symbols_map/Android.bp
new file mode 100644
index 0000000..0ba3b07
--- /dev/null
+++ b/cmd/symbols_map/Android.bp
@@ -0,0 +1,34 @@
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+blueprint_go_binary {
+    name: "symbols_map",
+    srcs: [
+        "elf.go",
+        "r8.go",
+        "symbols_map.go",
+    ],
+    testSrcs: [
+        "elf_test.go",
+        "r8_test.go",
+    ],
+    deps: [
+        "blueprint-pathtools",
+        "golang-protobuf-encoding-prototext",
+        "soong-response",
+        "symbols_map_proto",
+    ],
+}
+
+bootstrap_go_package {
+    name: "symbols_map_proto",
+    pkgPath: "android/soong/cmd/symbols_map/symbols_map_proto",
+    deps: [
+        "golang-protobuf-reflect-protoreflect",
+        "golang-protobuf-runtime-protoimpl",
+    ],
+    srcs: [
+        "symbols_map_proto/symbols_map.pb.go",
+    ],
+}
diff --git a/cmd/symbols_map/elf.go b/cmd/symbols_map/elf.go
new file mode 100644
index 0000000..b38896a
--- /dev/null
+++ b/cmd/symbols_map/elf.go
@@ -0,0 +1,95 @@
+// Copyright 2022 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"debug/elf"
+	"encoding/binary"
+	"encoding/hex"
+	"fmt"
+	"io"
+)
+
+const gnuBuildID = "GNU\x00"
+
+// elfIdentifier extracts the elf build ID from an elf file.  If allowMissing is true it returns
+// an empty identifier if the file exists but the build ID note does not.
+func elfIdentifier(filename string, allowMissing bool) (string, error) {
+	f, err := elf.Open(filename)
+	if err != nil {
+		return "", fmt.Errorf("failed to open %s: %w", filename, err)
+	}
+	defer f.Close()
+
+	buildIDNote := f.Section(".note.gnu.build-id")
+	if buildIDNote == nil {
+		if allowMissing {
+			return "", nil
+		}
+		return "", fmt.Errorf("failed to find .note.gnu.build-id in  %s", filename)
+	}
+
+	buildIDs, err := readNote(buildIDNote.Open(), f.ByteOrder)
+	if err != nil {
+		return "", fmt.Errorf("failed to read .note.gnu.build-id: %w", err)
+	}
+
+	for name, desc := range buildIDs {
+		if name == gnuBuildID {
+			return hex.EncodeToString(desc), nil
+		}
+	}
+
+	return "", nil
+}
+
+// readNote reads the contents of a note section, returning it as a map from name to descriptor.
+func readNote(note io.Reader, byteOrder binary.ByteOrder) (map[string][]byte, error) {
+	var noteHeader struct {
+		Namesz uint32
+		Descsz uint32
+		Type   uint32
+	}
+
+	notes := make(map[string][]byte)
+	for {
+		err := binary.Read(note, byteOrder, &noteHeader)
+		if err != nil {
+			if err == io.EOF {
+				return notes, nil
+			}
+			return nil, fmt.Errorf("failed to read note header: %w", err)
+		}
+
+		nameBuf := make([]byte, align4(noteHeader.Namesz))
+		err = binary.Read(note, byteOrder, &nameBuf)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read note name: %w", err)
+		}
+		name := string(nameBuf[:noteHeader.Namesz])
+
+		descBuf := make([]byte, align4(noteHeader.Descsz))
+		err = binary.Read(note, byteOrder, &descBuf)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read note desc: %w", err)
+		}
+		notes[name] = descBuf[:noteHeader.Descsz]
+	}
+}
+
+// align4 rounds the input up to the next multiple of 4.
+func align4(i uint32) uint32 {
+	return (i + 3) &^ 3
+}
diff --git a/cmd/symbols_map/elf_test.go b/cmd/symbols_map/elf_test.go
new file mode 100644
index 0000000..e616228
--- /dev/null
+++ b/cmd/symbols_map/elf_test.go
@@ -0,0 +1,45 @@
+// Copyright 2022 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"bytes"
+	"encoding/binary"
+	"reflect"
+	"testing"
+)
+
+func Test_readNote(t *testing.T) {
+	note := []byte{
+		0x04, 0x00, 0x00, 0x00,
+		0x10, 0x00, 0x00, 0x00,
+		0x03, 0x00, 0x00, 0x00,
+		0x47, 0x4e, 0x55, 0x00,
+		0xca, 0xaf, 0x44, 0xd2, 0x82, 0x78, 0x68, 0xfe, 0xc0, 0x90, 0xa3, 0x43, 0x85, 0x36, 0x6c, 0xc7,
+	}
+
+	descs, err := readNote(bytes.NewBuffer(note), binary.LittleEndian)
+	if err != nil {
+		t.Fatalf("unexpected error in readNote: %s", err)
+	}
+
+	expectedDescs := map[string][]byte{
+		"GNU\x00": []byte{0xca, 0xaf, 0x44, 0xd2, 0x82, 0x78, 0x68, 0xfe, 0xc0, 0x90, 0xa3, 0x43, 0x85, 0x36, 0x6c, 0xc7},
+	}
+
+	if !reflect.DeepEqual(descs, expectedDescs) {
+		t.Errorf("incorrect return, want %#v got %#v", expectedDescs, descs)
+	}
+}
diff --git a/cmd/symbols_map/r8.go b/cmd/symbols_map/r8.go
new file mode 100644
index 0000000..6f73e09
--- /dev/null
+++ b/cmd/symbols_map/r8.go
@@ -0,0 +1,56 @@
+// Copyright 2022 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+)
+
+const hashPrefix = "# pg_map_hash: "
+const hashTypePrefix = "SHA-256 "
+const commentPrefix = "#"
+
+// r8Identifier extracts the hash from the comments of a dictionary produced by R8. It returns
+// an empty identifier if no matching comment was found before the first non-comment line.
+func r8Identifier(filename string) (string, error) {
+	f, err := os.Open(filename)
+	if err != nil {
+		return "", fmt.Errorf("failed to open %s: %w", filename, err)
+	}
+	defer f.Close()
+
+	return extractR8CompilerHash(f)
+}
+
+func extractR8CompilerHash(r io.Reader) (string, error) {
+	s := bufio.NewScanner(r)
+	for s.Scan() {
+		line := s.Text()
+		if strings.HasPrefix(line, hashPrefix) {
+			hash := strings.TrimPrefix(line, hashPrefix)
+			if !strings.HasPrefix(hash, hashTypePrefix) {
+				return "", fmt.Errorf("invalid hash type found in %q", line)
+			}
+			return strings.TrimPrefix(hash, hashTypePrefix), nil
+		} else if !strings.HasPrefix(line, commentPrefix) {
+			break
+		}
+	}
+	return "", nil
+}
diff --git a/cmd/symbols_map/r8_test.go b/cmd/symbols_map/r8_test.go
new file mode 100644
index 0000000..5712da9
--- /dev/null
+++ b/cmd/symbols_map/r8_test.go
@@ -0,0 +1,91 @@
+// Copyright 2022 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+)
+
+func Test_extractR8CompilerHash(t *testing.T) {
+	testCases := []struct {
+		name string
+		data string
+
+		hash string
+		err  string
+	}{
+		{
+			name: "simple",
+			data: `# compiler: R8
+# compiler_version: 3.3.18-dev
+# min_api: 10000
+# compiler_hash: bab44c1a04a2201b55fe10394f477994205c34e0
+# common_typos_disable
+# {"id":"com.android.tools.r8.mapping","version":"2.0"}
+# pg_map_id: 7fe8b95
+# pg_map_hash: SHA-256 7fe8b95ae71f179f63d2a585356fb9cf2c8fb94df9c9dd50621ffa6d9e9e88da
+android.car.userlib.UserHelper -> android.car.userlib.UserHelper:
+`,
+			hash: "7fe8b95ae71f179f63d2a585356fb9cf2c8fb94df9c9dd50621ffa6d9e9e88da",
+		},
+		{
+			name: "empty",
+			data: ``,
+			hash: "",
+		},
+		{
+			name: "non comment line",
+			data: `# compiler: R8
+# compiler_version: 3.3.18-dev
+# min_api: 10000
+# compiler_hash: bab44c1a04a2201b55fe10394f477994205c34e0
+# common_typos_disable
+# {"id":"com.android.tools.r8.mapping","version":"2.0"}
+# pg_map_id: 7fe8b95
+android.car.userlib.UserHelper -> android.car.userlib.UserHelper:
+# pg_map_hash: SHA-256 7fe8b95ae71f179f63d2a585356fb9cf2c8fb94df9c9dd50621ffa6d9e9e88da
+`,
+			hash: "",
+		},
+		{
+			name: "invalid hash",
+			data: `# pg_map_hash: foobar 7fe8b95ae71f179f63d2a585356fb9cf2c8fb94df9c9dd50621ffa6d9e9e88da`,
+			err:  "invalid hash type",
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			hash, err := extractR8CompilerHash(bytes.NewBufferString(tt.data))
+			if err != nil {
+				if tt.err != "" {
+					if !strings.Contains(err.Error(), tt.err) {
+						t.Fatalf("incorrect error in extractR8CompilerHash, want %s got %s", tt.err, err)
+					}
+				} else {
+					t.Fatalf("unexpected error in extractR8CompilerHash: %s", err)
+				}
+			} else if tt.err != "" {
+				t.Fatalf("missing error in extractR8CompilerHash, want %s", tt.err)
+			}
+
+			if g, w := hash, tt.hash; g != w {
+				t.Errorf("incorrect hash, want %q got %q", w, g)
+			}
+		})
+	}
+}
diff --git a/cmd/symbols_map/symbols_map.go b/cmd/symbols_map/symbols_map.go
new file mode 100644
index 0000000..938446d
--- /dev/null
+++ b/cmd/symbols_map/symbols_map.go
@@ -0,0 +1,202 @@
+// Copyright 2022 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"strings"
+
+	"android/soong/cmd/symbols_map/symbols_map_proto"
+	"android/soong/response"
+
+	"github.com/google/blueprint/pathtools"
+	"google.golang.org/protobuf/encoding/prototext"
+	"google.golang.org/protobuf/proto"
+)
+
+// This tool is used to extract a hash from an elf file or an r8 dictionary and store it as a
+// textproto, or to merge multiple textprotos together.
+
+func main() {
+	var expandedArgs []string
+	for _, arg := range os.Args[1:] {
+		if strings.HasPrefix(arg, "@") {
+			f, err := os.Open(strings.TrimPrefix(arg, "@"))
+			if err != nil {
+				fmt.Fprintln(os.Stderr, err.Error())
+				os.Exit(1)
+			}
+
+			respArgs, err := response.ReadRspFile(f)
+			f.Close()
+			if err != nil {
+				fmt.Fprintln(os.Stderr, err.Error())
+				os.Exit(1)
+			}
+			expandedArgs = append(expandedArgs, respArgs...)
+		} else {
+			expandedArgs = append(expandedArgs, arg)
+		}
+	}
+
+	flags := flag.NewFlagSet("flags", flag.ExitOnError)
+
+	// Hide the flag package to prevent accidental references to flag instead of flags.
+	flag := struct{}{}
+	_ = flag
+
+	flags.Usage = func() {
+		fmt.Fprintf(flags.Output(), "Usage of %s:\n", os.Args[0])
+		fmt.Fprintf(flags.Output(), "  %s -elf|-r8 <input file> [-write_if_changed] <output file>\n", os.Args[0])
+		fmt.Fprintf(flags.Output(), "  %s -merge <output file> [-write_if_changed] [-ignore_missing_files] [-strip_prefix <prefix>] [<input file>...]\n", os.Args[0])
+		fmt.Fprintln(flags.Output())
+
+		flags.PrintDefaults()
+	}
+
+	elfFile := flags.String("elf", "", "extract identifier from an elf file")
+	r8File := flags.String("r8", "", "extract identifier from an r8 dictionary")
+	merge := flags.String("merge", "", "merge multiple identifier protos")
+
+	writeIfChanged := flags.Bool("write_if_changed", false, "only write output file if it is modified")
+	ignoreMissingFiles := flags.Bool("ignore_missing_files", false, "ignore missing input files in merge mode")
+	stripPrefix := flags.String("strip_prefix", "", "prefix to strip off of the location field in merge mode")
+
+	flags.Parse(expandedArgs)
+
+	if *merge != "" {
+		// If merge mode was requested perform the merge and exit early.
+		err := mergeProtos(*merge, flags.Args(), *stripPrefix, *writeIfChanged, *ignoreMissingFiles)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "failed to merge protos: %s", err)
+			os.Exit(1)
+		}
+		os.Exit(0)
+	}
+
+	if *elfFile == "" && *r8File == "" {
+		fmt.Fprintf(os.Stderr, "-elf or -r8 argument is required\n")
+		flags.Usage()
+		os.Exit(1)
+	}
+
+	if *elfFile != "" && *r8File != "" {
+		fmt.Fprintf(os.Stderr, "only one of -elf or -r8 argument is allowed\n")
+		flags.Usage()
+		os.Exit(1)
+	}
+
+	if flags.NArg() != 1 {
+		flags.Usage()
+		os.Exit(1)
+	}
+
+	output := flags.Arg(0)
+
+	var identifier string
+	var location string
+	var typ symbols_map_proto.Mapping_Type
+	var err error
+
+	if *elfFile != "" {
+		typ = symbols_map_proto.Mapping_ELF
+		location = *elfFile
+		identifier, err = elfIdentifier(*elfFile, true)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "error reading elf identifier: %s\n", err)
+			os.Exit(1)
+		}
+	} else if *r8File != "" {
+		typ = symbols_map_proto.Mapping_R8
+		identifier, err = r8Identifier(*r8File)
+		location = *r8File
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "error reading r8 identifier: %s\n", err)
+			os.Exit(1)
+		}
+	} else {
+		panic("shouldn't get here")
+	}
+
+	mapping := symbols_map_proto.Mapping{
+		Identifier: proto.String(identifier),
+		Location:   proto.String(location),
+		Type:       typ.Enum(),
+	}
+
+	err = writeTextProto(output, &mapping, *writeIfChanged)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error writing output: %s\n", err)
+		os.Exit(1)
+	}
+}
+
+// writeTextProto writes a proto to an output file as a textproto, optionally leaving the file
+// unmodified if it was already up to date.
+func writeTextProto(output string, message proto.Message, writeIfChanged bool) error {
+	marshaller := prototext.MarshalOptions{Multiline: true}
+	data, err := marshaller.Marshal(message)
+	if err != nil {
+		return fmt.Errorf("error marshalling textproto: %w", err)
+	}
+
+	if writeIfChanged {
+		err = pathtools.WriteFileIfChanged(output, data, 0666)
+	} else {
+		err = ioutil.WriteFile(output, data, 0666)
+	}
+
+	if err != nil {
+		return fmt.Errorf("error writing to %s: %w\n", output, err)
+	}
+
+	return nil
+}
+
+// mergeProtos merges a list of textproto files containing Mapping messages into a single textproto
+// containing a Mappings message.
+func mergeProtos(output string, inputs []string, stripPrefix string, writeIfChanged bool, ignoreMissingFiles bool) error {
+	mappings := symbols_map_proto.Mappings{}
+	for _, input := range inputs {
+		mapping := symbols_map_proto.Mapping{}
+		data, err := ioutil.ReadFile(input)
+		if err != nil {
+			if ignoreMissingFiles && os.IsNotExist(err) {
+				// Merge mode is used on a list of files in the packaging directory.  If multiple
+				// goals are included on the build command line, for example `dist` and `tests`,
+				// then the symbols packaging rule for `dist` can run while a dependency of `tests`
+				// is modifying the symbols packaging directory.  That can result in a file that
+				// existed when the file list was generated being deleted as part of updating it,
+				// resulting in sporadic ENOENT errors.  Ignore them if -ignore_missing_files
+				// was passed on the command line.
+				continue
+			}
+			return fmt.Errorf("failed to read %s: %w", input, err)
+		}
+		err = prototext.Unmarshal(data, &mapping)
+		if err != nil {
+			return fmt.Errorf("failed to parse textproto %s: %w", input, err)
+		}
+		if stripPrefix != "" && mapping.Location != nil {
+			mapping.Location = proto.String(strings.TrimPrefix(*mapping.Location, stripPrefix))
+		}
+		mappings.Mappings = append(mappings.Mappings, &mapping)
+	}
+
+	return writeTextProto(output, &mappings, writeIfChanged)
+}
diff --git a/cmd/symbols_map/symbols_map_proto/symbols_map.pb.go b/cmd/symbols_map/symbols_map_proto/symbols_map.pb.go
new file mode 100644
index 0000000..f9c0ce5
--- /dev/null
+++ b/cmd/symbols_map/symbols_map_proto/symbols_map.pb.go
@@ -0,0 +1,315 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.27.1
+// 	protoc        v3.9.1
+// source: symbols_map.proto
+
+package symbols_map_proto
+
+import (
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// Type is the valid types of a mapping.
+type Mapping_Type int32
+
+const (
+	// ELF denotes a mapping from an elf build ID to an unstripped elf file.
+	Mapping_ELF Mapping_Type = 0
+	// R8 denotes a mapping from an R8 dictionary hash to an R8 dictionary.
+	Mapping_R8 Mapping_Type = 1
+)
+
+// Enum value maps for Mapping_Type.
+var (
+	Mapping_Type_name = map[int32]string{
+		0: "ELF",
+		1: "R8",
+	}
+	Mapping_Type_value = map[string]int32{
+		"ELF": 0,
+		"R8":  1,
+	}
+)
+
+func (x Mapping_Type) Enum() *Mapping_Type {
+	p := new(Mapping_Type)
+	*p = x
+	return p
+}
+
+func (x Mapping_Type) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (Mapping_Type) Descriptor() protoreflect.EnumDescriptor {
+	return file_symbols_map_proto_enumTypes[0].Descriptor()
+}
+
+func (Mapping_Type) Type() protoreflect.EnumType {
+	return &file_symbols_map_proto_enumTypes[0]
+}
+
+func (x Mapping_Type) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Do not use.
+func (x *Mapping_Type) UnmarshalJSON(b []byte) error {
+	num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b)
+	if err != nil {
+		return err
+	}
+	*x = Mapping_Type(num)
+	return nil
+}
+
+// Deprecated: Use Mapping_Type.Descriptor instead.
+func (Mapping_Type) EnumDescriptor() ([]byte, []int) {
+	return file_symbols_map_proto_rawDescGZIP(), []int{0, 0}
+}
+
+type Mapping struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// identifier is a unique identifier of a location, generally the hash of the file.  For an
+	// elf file it is the elf build ID, for an R8 dictionary it is the hash from the comments in the
+	// top of the file.  It may be empty if no hash could be extracted from the file.
+	Identifier *string `protobuf:"bytes,1,opt,name=identifier" json:"identifier,omitempty"`
+	// location is the path to the file with the given identifier.  The location should be valid
+	// both on the local disk and in the distributed symbols.zip or proguard_dict.zip files.
+	Location *string `protobuf:"bytes,2,opt,name=location" json:"location,omitempty"`
+	// type is the type of the mapping, either ELF or R8.
+	Type *Mapping_Type `protobuf:"varint,3,opt,name=type,enum=symbols_map.Mapping_Type" json:"type,omitempty"`
+}
+
+func (x *Mapping) Reset() {
+	*x = Mapping{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_symbols_map_proto_msgTypes[0]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Mapping) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Mapping) ProtoMessage() {}
+
+func (x *Mapping) ProtoReflect() protoreflect.Message {
+	mi := &file_symbols_map_proto_msgTypes[0]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Mapping.ProtoReflect.Descriptor instead.
+func (*Mapping) Descriptor() ([]byte, []int) {
+	return file_symbols_map_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *Mapping) GetIdentifier() string {
+	if x != nil && x.Identifier != nil {
+		return *x.Identifier
+	}
+	return ""
+}
+
+func (x *Mapping) GetLocation() string {
+	if x != nil && x.Location != nil {
+		return *x.Location
+	}
+	return ""
+}
+
+func (x *Mapping) GetType() Mapping_Type {
+	if x != nil && x.Type != nil {
+		return *x.Type
+	}
+	return Mapping_ELF
+}
+
+type Mappings struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Mappings []*Mapping `protobuf:"bytes,4,rep,name=mappings" json:"mappings,omitempty"`
+}
+
+func (x *Mappings) Reset() {
+	*x = Mappings{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_symbols_map_proto_msgTypes[1]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Mappings) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Mappings) ProtoMessage() {}
+
+func (x *Mappings) ProtoReflect() protoreflect.Message {
+	mi := &file_symbols_map_proto_msgTypes[1]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Mappings.ProtoReflect.Descriptor instead.
+func (*Mappings) Descriptor() ([]byte, []int) {
+	return file_symbols_map_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *Mappings) GetMappings() []*Mapping {
+	if x != nil {
+		return x.Mappings
+	}
+	return nil
+}
+
+var File_symbols_map_proto protoreflect.FileDescriptor
+
+var file_symbols_map_proto_rawDesc = []byte{
+	0x0a, 0x11, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x5f, 0x6d, 0x61, 0x70, 0x2e, 0x70, 0x72,
+	0x6f, 0x74, 0x6f, 0x12, 0x0b, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x5f, 0x6d, 0x61, 0x70,
+	0x22, 0x8d, 0x01, 0x0a, 0x07, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x0a,
+	0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x0a, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08,
+	0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08,
+	0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x2d, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65,
+	0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73,
+	0x5f, 0x6d, 0x61, 0x70, 0x2e, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x79, 0x70,
+	0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x17, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12,
+	0x07, 0x0a, 0x03, 0x45, 0x4c, 0x46, 0x10, 0x00, 0x12, 0x06, 0x0a, 0x02, 0x52, 0x38, 0x10, 0x01,
+	0x22, 0x3c, 0x0a, 0x08, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x30, 0x0a, 0x08,
+	0x6d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14,
+	0x2e, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x5f, 0x6d, 0x61, 0x70, 0x2e, 0x4d, 0x61, 0x70,
+	0x70, 0x69, 0x6e, 0x67, 0x52, 0x08, 0x6d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x73, 0x42, 0x31,
+	0x5a, 0x2f, 0x61, 0x6e, 0x64, 0x72, 0x6f, 0x69, 0x64, 0x2f, 0x73, 0x6f, 0x6f, 0x6e, 0x67, 0x2f,
+	0x63, 0x6d, 0x64, 0x2f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x5f, 0x6d, 0x61, 0x70, 0x2f,
+	0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x5f, 0x6d, 0x61, 0x70, 0x5f, 0x70, 0x72, 0x6f, 0x74,
+	0x6f,
+}
+
+var (
+	file_symbols_map_proto_rawDescOnce sync.Once
+	file_symbols_map_proto_rawDescData = file_symbols_map_proto_rawDesc
+)
+
+func file_symbols_map_proto_rawDescGZIP() []byte {
+	file_symbols_map_proto_rawDescOnce.Do(func() {
+		file_symbols_map_proto_rawDescData = protoimpl.X.CompressGZIP(file_symbols_map_proto_rawDescData)
+	})
+	return file_symbols_map_proto_rawDescData
+}
+
+var file_symbols_map_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
+var file_symbols_map_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
+var file_symbols_map_proto_goTypes = []interface{}{
+	(Mapping_Type)(0), // 0: symbols_map.Mapping.Type
+	(*Mapping)(nil),   // 1: symbols_map.Mapping
+	(*Mappings)(nil),  // 2: symbols_map.Mappings
+}
+var file_symbols_map_proto_depIdxs = []int32{
+	0, // 0: symbols_map.Mapping.type:type_name -> symbols_map.Mapping.Type
+	1, // 1: symbols_map.Mappings.mappings:type_name -> symbols_map.Mapping
+	2, // [2:2] is the sub-list for method output_type
+	2, // [2:2] is the sub-list for method input_type
+	2, // [2:2] is the sub-list for extension type_name
+	2, // [2:2] is the sub-list for extension extendee
+	0, // [0:2] is the sub-list for field type_name
+}
+
+func init() { file_symbols_map_proto_init() }
+func file_symbols_map_proto_init() {
+	if File_symbols_map_proto != nil {
+		return
+	}
+	if !protoimpl.UnsafeEnabled {
+		file_symbols_map_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*Mapping); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_symbols_map_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*Mappings); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: file_symbols_map_proto_rawDesc,
+			NumEnums:      1,
+			NumMessages:   2,
+			NumExtensions: 0,
+			NumServices:   0,
+		},
+		GoTypes:           file_symbols_map_proto_goTypes,
+		DependencyIndexes: file_symbols_map_proto_depIdxs,
+		EnumInfos:         file_symbols_map_proto_enumTypes,
+		MessageInfos:      file_symbols_map_proto_msgTypes,
+	}.Build()
+	File_symbols_map_proto = out.File
+	file_symbols_map_proto_rawDesc = nil
+	file_symbols_map_proto_goTypes = nil
+	file_symbols_map_proto_depIdxs = nil
+}
diff --git a/cmd/symbols_map/symbols_map_proto/symbols_map.proto b/cmd/symbols_map/symbols_map_proto/symbols_map.proto
new file mode 100644
index 0000000..693fe3e
--- /dev/null
+++ b/cmd/symbols_map/symbols_map_proto/symbols_map.proto
@@ -0,0 +1,44 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package symbols_map;
+option go_package = "android/soong/cmd/symbols_map/symbols_map_proto";
+
+message Mapping {
+  // identifier is a unique identifier of a location, generally the hash of the file.  For an
+  // elf file it is the elf build ID, for an R8 dictionary it is the hash from the comments in the
+  // top of the file.  It may be empty if no hash could be extracted from the file.
+  optional string identifier = 1;
+
+  // location is the path to the file with the given identifier.  The location should be valid
+  // both on the local disk and in the distributed symbols.zip or proguard_dict.zip files.
+  optional string location = 2;
+
+  // Type is the valid types of a mapping.
+  enum Type {
+    // ELF denotes a mapping from an elf build ID to an unstripped elf file.
+    ELF = 0;
+    // R8 denotes a mapping from an R8 dictionary hash to an R8 dictionary.
+    R8 = 1;
+  }
+
+  // type is the type of the mapping, either ELF or R8.
+  optional Type type = 3;
+}
+
+message Mappings {
+  repeated Mapping mappings = 4;
+}
\ No newline at end of file
diff --git a/cmd/symbols_map/symbols_map_test.go b/cmd/symbols_map/symbols_map_test.go
new file mode 100644
index 0000000..754b7ef
--- /dev/null
+++ b/cmd/symbols_map/symbols_map_test.go
@@ -0,0 +1,217 @@
+// Copyright 2022 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"android/soong/cmd/symbols_map/symbols_map_proto"
+
+	"google.golang.org/protobuf/encoding/prototext"
+	"google.golang.org/protobuf/proto"
+)
+
+func Test_mergeProtos(t *testing.T) {
+	type testFile struct {
+		filename string
+		contents *symbols_map_proto.Mapping
+		missing  bool
+	}
+
+	tests := []struct {
+		name               string
+		inputs             []testFile
+		stripPrefix        string
+		writeIfChanged     bool
+		ignoreMissingFiles bool
+
+		error  string
+		output *symbols_map_proto.Mappings
+	}{
+		{
+			name:   "empty",
+			output: &symbols_map_proto.Mappings{},
+		},
+		{
+			name: "merge",
+			inputs: []testFile{
+				{
+					filename: "foo",
+					contents: &symbols_map_proto.Mapping{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("symbols/foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+				},
+				{
+					filename: "bar",
+					contents: &symbols_map_proto.Mapping{
+						Identifier: proto.String("bar"),
+						Location:   proto.String("symbols/bar"),
+						Type:       symbols_map_proto.Mapping_R8.Enum(),
+					},
+				},
+			},
+			output: &symbols_map_proto.Mappings{
+				Mappings: []*symbols_map_proto.Mapping{
+					{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("symbols/foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+					{
+						Identifier: proto.String("bar"),
+						Location:   proto.String("symbols/bar"),
+						Type:       symbols_map_proto.Mapping_R8.Enum(),
+					},
+				},
+			},
+		},
+		{
+			name: "strip prefix",
+			inputs: []testFile{
+				{
+					filename: "foo",
+					contents: &symbols_map_proto.Mapping{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("symbols/foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+				},
+				{
+					filename: "bar",
+					contents: &symbols_map_proto.Mapping{
+						Identifier: proto.String("bar"),
+						Location:   proto.String("symbols/bar"),
+						Type:       symbols_map_proto.Mapping_R8.Enum(),
+					},
+				},
+			},
+			stripPrefix: "symbols/",
+			output: &symbols_map_proto.Mappings{
+				Mappings: []*symbols_map_proto.Mapping{
+					{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+					{
+						Identifier: proto.String("bar"),
+						Location:   proto.String("bar"),
+						Type:       symbols_map_proto.Mapping_R8.Enum(),
+					},
+				},
+			},
+		},
+		{
+			name: "missing",
+			inputs: []testFile{
+				{
+					filename: "foo",
+					contents: &symbols_map_proto.Mapping{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("symbols/foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+				},
+				{
+					filename: "bar",
+					missing:  true,
+				},
+			},
+			error: "no such file or directory",
+		},
+		{
+			name: "ignore missing",
+			inputs: []testFile{
+				{
+					filename: "foo",
+					contents: &symbols_map_proto.Mapping{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("symbols/foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+				},
+				{
+					filename: "bar",
+					missing:  true,
+				},
+			},
+			ignoreMissingFiles: true,
+			output: &symbols_map_proto.Mappings{
+				Mappings: []*symbols_map_proto.Mapping{
+					{
+						Identifier: proto.String("foo"),
+						Location:   proto.String("symbols/foo"),
+						Type:       symbols_map_proto.Mapping_ELF.Enum(),
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dir, err := os.MkdirTemp("", "test_mergeProtos")
+			if err != nil {
+				t.Fatalf("failed to create temporary directory: %s", err)
+			}
+			defer os.RemoveAll(dir)
+
+			var inputs []string
+			for _, in := range tt.inputs {
+				path := filepath.Join(dir, in.filename)
+				inputs = append(inputs, path)
+				if !in.missing {
+					err := writeTextProto(path, in.contents, false)
+					if err != nil {
+						t.Fatalf("failed to create input file %s: %s", path, err)
+					}
+				}
+			}
+			output := filepath.Join(dir, "out")
+
+			err = mergeProtos(output, inputs, tt.stripPrefix, tt.writeIfChanged, tt.ignoreMissingFiles)
+			if err != nil {
+				if tt.error != "" {
+					if !strings.Contains(err.Error(), tt.error) {
+						t.Fatalf("expected error %q, got %s", tt.error, err.Error())
+					}
+				} else {
+					t.Fatalf("unexpected error %q", err)
+				}
+			} else if tt.error != "" {
+				t.Fatalf("missing error %q", tt.error)
+			} else {
+				data, err := ioutil.ReadFile(output)
+				if err != nil {
+					t.Fatalf("failed to read output file %s: %s", output, err)
+				}
+				var got symbols_map_proto.Mappings
+				err = prototext.Unmarshal(data, &got)
+				if err != nil {
+					t.Fatalf("failed to unmarshal textproto %s: %s", output, err)
+				}
+
+				if !proto.Equal(tt.output, &got) {
+					t.Fatalf("expected output %q, got %q", tt.output.String(), got.String())
+				}
+			}
+		})
+	}
+}