Normalize LANG / LC_* environment

This ensures that the current locale supports UTF-8, and that we're
getting a consistent (but still supported by the system) locale for
every configuration except user-facing messages. This should eliminate
any reproducibility problems around sorting, formatting, etc for all
built products, while still showing localized error messages where
available.

Bug: 71573630
Test: LANG=es_ES LANGUAGE=es: m   (check env in soong.log)
Change-Id: If33311899eaed8c44573113ee35c5a71cee503a0
diff --git a/ui/build/config.go b/ui/build/config.go
index 0c37724..363121f 100644
--- a/ui/build/config.go
+++ b/ui/build/config.go
@@ -168,6 +168,8 @@
 	}()
 	absJavaHome := absPath(ctx, javaHome)
 
+	ret.configureLocale(ctx)
+
 	newPath := []string{filepath.Join(absJavaHome, "bin")}
 	if path, ok := ret.environ.Get("PATH"); ok && path != "" {
 		newPath = append(newPath, path)
@@ -228,6 +230,52 @@
 	}
 }
 
+func (c *configImpl) configureLocale(ctx Context) {
+	cmd := Command(ctx, Config{c}, "locale", "locale", "-a")
+	output, err := cmd.Output()
+
+	var locales []string
+	if err == nil {
+		locales = strings.Split(string(output), "\n")
+	} else {
+		// If we're unable to list the locales, let's assume en_US.UTF-8
+		locales = []string{"en_US.UTF-8"}
+		ctx.Verbosef("Failed to list locales (%q), falling back to %q", err, locales)
+	}
+
+	// gettext uses LANGUAGE, which is passed directly through
+
+	// For LANG and LC_*, only preserve the evaluated version of
+	// LC_MESSAGES
+	user_lang := ""
+	if lc_all, ok := c.environ.Get("LC_ALL"); ok {
+		user_lang = lc_all
+	} else if lc_messages, ok := c.environ.Get("LC_MESSAGES"); ok {
+		user_lang = lc_messages
+	} else if lang, ok := c.environ.Get("LANG"); ok {
+		user_lang = lang
+	}
+
+	c.environ.UnsetWithPrefix("LC_")
+
+	if user_lang != "" {
+		c.environ.Set("LC_MESSAGES", user_lang)
+	}
+
+	// The for LANG, use C.UTF-8 if it exists (Debian currently, proposed
+	// for others)
+	if inList("C.UTF-8", locales) {
+		c.environ.Set("LANG", "C.UTF-8")
+	} else if inList("en_US.UTF-8", locales) {
+		c.environ.Set("LANG", "en_US.UTF-8")
+	} else if inList("en_US.utf8", locales) {
+		// These normalize to the same thing
+		c.environ.Set("LANG", "en_US.UTF-8")
+	} else {
+		ctx.Fatalln("System doesn't support either C.UTF-8 or en_US.UTF-8")
+	}
+}
+
 // Lunch configures the environment for a specific product similarly to the
 // `lunch` bash function.
 func (c *configImpl) Lunch(ctx Context, product, variant string) {