Blame - tools/fonts/fontchain_linter.py - android_frameworks_base

2016-03-09 23:08:45 -0800

[diff] [blame]

1

#!/usr/bin/env python

2

3

import collections

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

4

import copy

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

5

import glob

6

from os import path

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

7

import re

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

8

import sys

9

from xml.etree import ElementTree

10

11

from fontTools import ttLib

12

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

13

EMOJI_VS = 0xFE0F

14

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

15

LANG_TO_SCRIPT = {

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

16

'as': 'Beng',

Calvin Pan

2021-12-06 21:42:12 +0800

[diff] [blame^]

17

'am': 'Latn',

Roozbeh Pournader

7e04dd1

2017-10-13 17:41:31 -0700

[diff] [blame]

18

'be': 'Cyrl',

Roozbeh Pournader

033b222

2017-02-22 18:53:39 -0800

[diff] [blame]

19

'bg': 'Cyrl',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

20

'bn': 'Beng',

Roozbeh Pournader

033b222

2017-02-22 18:53:39 -0800

[diff] [blame]

21

'cu': 'Cyrl',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

22

'cy': 'Latn',

23

'da': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

24

'de': 'Latn',

25

'en': 'Latn',

26

'es': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

27

'et': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

28

'eu': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

29

'fr': 'Latn',

30

'ga': 'Latn',

Calvin Pan

2021-12-06 21:42:12 +0800

[diff] [blame^]

31

'gl': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

32

'gu': 'Gujr',

33

'hi': 'Deva',

34

'hr': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

35

'hu': 'Latn',

36

'hy': 'Armn',

Calvin Pan

2021-12-06 21:42:12 +0800

[diff] [blame^]

37

'it': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

38

'ja': 'Jpan',

39

'kn': 'Knda',

40

'ko': 'Kore',

Roozbeh Pournader

7e04dd1

2017-10-13 17:41:31 -0700

[diff] [blame]

41

'la': 'Latn',

Calvin Pan

2021-12-06 21:42:12 +0800

[diff] [blame^]

42

'lt': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

43

'ml': 'Mlym',

44

'mn': 'Cyrl',

45

'mr': 'Deva',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

46

'nb': 'Latn',

47

'nn': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

48

'or': 'Orya',

49

'pa': 'Guru',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

50

'pt': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

'sl': 'Latn',

'ta': 'Taml',

'te': 'Telu',

'tk': 'Latn',

Calvin Pan

2021-12-06 21:42:12 +0800

[diff] [blame^]

55

'uk': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

56

}

57

58

def lang_to_script(lang_code):

59

lang = lang_code.lower()

60

while lang not in LANG_TO_SCRIPT:

61

hyphen_idx = lang.rfind('-')

62

assert hyphen_idx != -1, (

63

'We do not know what script the "%s" language is written in.'

64

% lang_code)

65

assumed_script = lang[hyphen_idx+1:]

66

if len(assumed_script) == 4 and assumed_script.isalpha():

67

# This is actually the script

68

return assumed_script.title()

69

lang = lang[:hyphen_idx]

70

return LANG_TO_SCRIPT[lang]

71

72

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

73

def printable(inp):

74

if type(inp) is set: # set of character sequences

75

return '{' + ', '.join([printable(seq) for seq in inp]) + '}'

76

if type(inp) is tuple: # character sequence

77

return '<' + (', '.join([printable(ch) for ch in inp])) + '>'

78

else: # single character

79

return 'U+%04X' % inp

80

81

82

def open_font(font):

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

83

font_file, index = font

84

font_path = path.join(_fonts_dir, font_file)

85

if index is not None:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

86

return ttLib.TTFont(font_path, fontNumber=index)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

87

else:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

88

return ttLib.TTFont(font_path)

89

90

91

def get_best_cmap(font):

92

ttfont = open_font(font)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

93

all_unicode_cmap = None

94

bmp_cmap = None

95

for cmap in ttfont['cmap'].tables:

96

specifier = (cmap.format, cmap.platformID, cmap.platEncID)

97

if specifier == (4, 3, 1):

98

assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )

99

bmp_cmap = cmap

100

elif specifier == (12, 3, 10):

101

assert all_unicode_cmap is None, (

102

'More than one UCS-4 cmap in %s' % (font, ))

103

all_unicode_cmap = cmap

104

105

return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap

106

107

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

108

def get_variation_sequences_cmap(font):

109

ttfont = open_font(font)

110

vs_cmap = None

111

for cmap in ttfont['cmap'].tables:

112

specifier = (cmap.format, cmap.platformID, cmap.platEncID)

113

if specifier == (14, 0, 5):

114

assert vs_cmap is None, 'More than one VS cmap in %s' % (font, )

vs_cmap = cmap

return vs_cmap

def get_emoji_map(font):

120

# Add normal characters

121

emoji_map = copy.copy(get_best_cmap(font))

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

122

reverse_cmap = {glyph: code for code, glyph in emoji_map.items() if not contains_pua(code) }

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

123

124

# Add variation sequences

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

125

vs_cmap = get_variation_sequences_cmap(font)

126

if vs_cmap:

127

for vs in vs_cmap.uvsDict:

128

for base, glyph in vs_cmap.uvsDict[vs]:

129

if glyph is None:

130

emoji_map[(base, vs)] = emoji_map[base]

131

else:

132

emoji_map[(base, vs)] = glyph

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

133

134

# Add GSUB rules

135

ttfont = open_font(font)

136

for lookup in ttfont['GSUB'].table.LookupList.Lookup:

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

137

if lookup.LookupType != 4:

138

# Other lookups are used in the emoji font for fallback.

139

# We ignore them for now.

140

continue

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

141

for subtable in lookup.SubTable:

142

ligatures = subtable.ligatures

143

for first_glyph in ligatures:

144

for ligature in ligatures[first_glyph]:

145

sequence = [first_glyph] + ligature.Component

146

sequence = [reverse_cmap[glyph] for glyph in sequence]

147

sequence = tuple(sequence)

148

# Make sure no starting subsequence of 'sequence' has been

149

# seen before.

150

for sub_len in range(2, len(sequence)+1):

151

subsequence = sequence[:sub_len]

152

assert subsequence not in emoji_map

153

emoji_map[sequence] = ligature.LigGlyph

return emoji_map

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

158

def assert_font_supports_any_of_chars(font, chars):

159

best_cmap = get_best_cmap(font)

160

for char in chars:

161

if char in best_cmap:

162

return

163

sys.exit('None of characters in %s were found in %s' % (chars, font))

164

165

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

166

def assert_font_supports_all_of_chars(font, chars):

167

best_cmap = get_best_cmap(font)

168

for char in chars:

169

assert char in best_cmap, (

170

'U+%04X was not found in %s' % (char, font))

171

172

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

173

def assert_font_supports_none_of_chars(font, chars, fallbackName):

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

174

best_cmap = get_best_cmap(font)

175

for char in chars:

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

176

if fallbackName:

177

assert char not in best_cmap, 'U+%04X was found in %s' % (char, font)

178

else:

179

assert char not in best_cmap, (

180

'U+%04X was found in %s in fallback %s' % (char, font, fallbackName))

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

181

182

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

183

def assert_font_supports_all_sequences(font, sequences):

184

vs_dict = get_variation_sequences_cmap(font).uvsDict

185

for base, vs in sorted(sequences):

186

assert vs in vs_dict and (base, None) in vs_dict[vs], (

187

'<U+%04X, U+%04X> was not found in %s' % (base, vs, font))

188

189

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

190

def check_hyphens(hyphens_dir):

191

# Find all the scripts that need automatic hyphenation

192

scripts = set()

193

for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):

194

hyb_file = path.basename(hyb_file)

195

assert hyb_file.startswith('hyph-'), (

196

'Unknown hyphenation file %s' % hyb_file)

197

lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]

198

scripts.add(lang_to_script(lang_code))

199

200

HYPHENS = {0x002D, 0x2010}

201

for script in scripts:

202

fonts = _script_to_font_map[script]

203

assert fonts, 'No fonts found for the "%s" script' % script

204

for font in fonts:

205

assert_font_supports_any_of_chars(font, HYPHENS)

206

207

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

208

class FontRecord(object):

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

209

def __init__(self, name, psName, scripts, variant, weight, style, fallback_for, font):

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

210

self.name = name

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

211

self.psName = psName

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

212

self.scripts = scripts

213

self.variant = variant

214

self.weight = weight

215

self.style = style

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

216

self.fallback_for = fallback_for

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

self.font = font

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

220

def parse_fonts_xml(fonts_xml_path):

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

221

global _script_to_font_map, _fallback_chains, _all_fonts

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

222

_script_to_font_map = collections.defaultdict(set)

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

223

_fallback_chains = {}

224

_all_fonts = []

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

225

tree = ElementTree.parse(fonts_xml_path)

Seigo Nonaka

9092dc2

2017-01-06 16:54:52 +0900

[diff] [blame]

226

families = tree.findall('family')

227

# Minikin supports up to 254 but users can place their own font at the first

228

# place. Thus, 253 is the maximum allowed number of font families in the

229

# default collection.

230

assert len(families) < 254, (

231

'System font collection can contains up to 253 font families.')

232

for family in families:

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

233

name = family.get('name')

234

variant = family.get('variant')

235

langs = family.get('lang')

236

if name:

237

assert variant is None, (

238

'No variant expected for LGC font %s.' % name)

239

assert langs is None, (

240

'No language expected for LGC fonts %s.' % name)

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

241

assert name not in _fallback_chains, 'Duplicated name entry %s' % name

242

_fallback_chains[name] = []

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

243

else:

244

assert variant in {None, 'elegant', 'compact'}, (

245

'Unexpected value for variant: %s' % variant)

246

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

247

trim_re = re.compile(r"^[ \n\r\t]*(.+)[ \n\r\t]*$")

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

248

for family in families:

249

name = family.get('name')

250

variant = family.get('variant')

251

langs = family.get('lang')

252

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

253

if langs:

254

langs = langs.split()

255

scripts = {lang_to_script(lang) for lang in langs}

else:

scripts = set()

for child in family:

assert child.tag == 'font', (

261

'Unknown tag <%s>' % child.tag)

Jungshik Shin

88b1114

2017-03-17 14:56:17 -0700

[diff] [blame]

262

font_file = child.text.rstrip()

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

263

264

m = trim_re.match(font_file)

265

font_file = m.group(1)

266

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

267

weight = int(child.get('weight'))

268

assert weight % 100 == 0, (

269

'Font weight "%d" is not a multiple of 100.' % weight)

270

271

style = child.get('style')

272

assert style in {'normal', 'italic'}, (

273

'Unknown style "%s"' % style)

274

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

275

fallback_for = child.get('fallbackFor')

276

277

assert not name or not fallback_for, (

278

'name and fallbackFor cannot be present at the same time')

279

assert not fallback_for or fallback_for in _fallback_chains, (

280

'Unknown fallback name: %s' % fallback_for)

281

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

282

index = child.get('index')

if index:

index = int(index)

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

286

if not path.exists(path.join(_fonts_dir, m.group(1))):

Seigo Nonaka

1403ff2

2018-01-18 17:24:31 -0800

[diff] [blame]

287

continue # Missing font is a valid case. Just ignore the missing font files.

288

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

289

record = FontRecord(

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

290

name,

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

291

child.get('postScriptName'),

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

frozenset(scripts),

variant,

weight,

style,

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

fallback_for,

(font_file, index))

_all_fonts.append(record)

300

301

if not fallback_for:

302

if not name or name == 'sans-serif':

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

303

for _, fallback in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

304

fallback.append(record)

305

else:

306

_fallback_chains[name].append(record)

307

else:

308

_fallback_chains[fallback_for].append(record)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

309

310

if name: # non-empty names are used for default LGC fonts

311

map_scripts = {'Latn', 'Grek', 'Cyrl'}

312

else:

313

map_scripts = scripts

314

for script in map_scripts:

315

_script_to_font_map[script].add((font_file, index))

316

317

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

318

def check_emoji_coverage(all_emoji, equivalent_emoji):

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

319

emoji_fonts = get_emoji_fonts()

320

check_emoji_font_coverage(emoji_fonts, all_emoji, equivalent_emoji)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

321

322

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

323

def get_emoji_fonts():

324

return [ record.font for record in _all_fonts if 'Zsye' in record.scripts ]

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

325

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

326

def is_pua(x):

327

return 0xE000 <= x <= 0xF8FF or 0xF0000 <= x <= 0xFFFFD or 0x100000 <= x <= 0x10FFFD

328

329

def contains_pua(sequence):

330

if type(sequence) is tuple:

331

return any([is_pua(x) for x in sequence])

332

else:

333

return is_pua(sequence)

334

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

335

def get_psname(ttf):

336

return str(next(x for x in ttf['name'].names

337

if x.platformID == 3 and x.platEncID == 1 and x.nameID == 6))

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

338

339

def check_emoji_compat():

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

340

for emoji_font in get_emoji_fonts():

341

ttf = open_font(emoji_font)

342

psname = get_psname(ttf)

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

343

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

344

# If the font file is NotoColorEmoji, it must be Compat font.

345

if psname == 'NotoColorEmoji':

346

meta = ttf['meta']

347

assert meta, 'Compat font must have meta table'

348

assert 'Emji' in meta.data, 'meta table should have \'Emji\' data.'

349

350

def check_emoji_font_coverage(emoji_fonts, all_emoji, equivalent_emoji):

351

coverages = []

352

for emoji_font in emoji_fonts:

353

coverages.append(get_emoji_map(emoji_font))

Rod S

e34a19d

2020-03-16 00:01:15 -0700

[diff] [blame]

errors = []

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

357

for sequence in all_emoji:

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

358

if all([sequence not in coverage for coverage in coverages]):

359

errors.append('%s is not supported in the emoji font.' % printable(sequence))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

360

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

361

for coverage in coverages:

362

for sequence in coverage:

363

if sequence in {0x0000, 0x000D, 0x0020}:

364

# The font needs to support a few extra characters, which is OK

365

continue

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

366

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

367

if contains_pua(sequence):

368

# The font needs to have some PUA for EmojiCompat library.

369

continue

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

370

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

371

if sequence not in all_emoji:

372

errors.append('%s support unexpected in the emoji font.' % printable(sequence))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

373

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

374

for first, second in equivalent_emoji.items():

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

375

for coverage in coverages:

376

if first not in coverage or second not in coverage:

377

continue # sequence will be reported missing

378

if coverage[first] != coverage[second]:

379

errors.append('%s and %s should map to the same glyph.' % (

380

printable(first),

381

printable(second)))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

382

Seigo Nonaka

2021-06-14 17:38:22 -0700

[diff] [blame]

383

for coverage in coverages:

384

for glyph in set(coverage.values()):

385

maps_to_glyph = [

386

seq for seq in coverage if coverage[seq] == glyph and not contains_pua(seq) ]

387

if len(maps_to_glyph) > 1:

388

# There are more than one sequences mapping to the same glyph. We

389

# need to make sure they were expected to be equivalent.

390

equivalent_seqs = set()

391

for seq in maps_to_glyph:

392

equivalent_seq = seq

393

while equivalent_seq in equivalent_emoji:

394

equivalent_seq = equivalent_emoji[equivalent_seq]

395

equivalent_seqs.add(equivalent_seq)

396

if len(equivalent_seqs) != 1:

397

errors.append('The sequences %s should not result in the same glyph %s' % (

398

printable(equivalent_seqs),

399

glyph))

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

400

Rod S

e34a19d

2020-03-16 00:01:15 -0700

[diff] [blame]

401

assert not errors, '%d emoji font errors:\n%s\n%d emoji font coverage errors' % (len(errors), '\n'.join(errors), len(errors))

402

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

403

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

404

def check_emoji_defaults(default_emoji):

405

missing_text_chars = _emoji_properties['Emoji'] - default_emoji

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

406

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

407

emoji_font_seen = False

408

for record in fallback_chain:

409

if 'Zsye' in record.scripts:

410

emoji_font_seen = True

411

# No need to check the emoji font

412

continue

413

# For later fonts, we only check them if they have a script

414

# defined, since the defined script may get them to a higher

415

# score even if they appear after the emoji font. However,

416

# we should skip checking the text symbols font, since

417

# symbol fonts should be able to override the emoji display

418

# style when 'Zsym' is explicitly specified by the user.

419

if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts):

420

continue

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

421

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

422

# Check default emoji-style characters

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

423

assert_font_supports_none_of_chars(record.font, default_emoji, name)

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

424

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

425

# Mark default text-style characters appearing in fonts above the emoji

426

# font as seen

427

if not emoji_font_seen:

428

missing_text_chars -= set(get_best_cmap(record.font))

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

429

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

430

# Noto does not have monochrome glyphs for Unicode 7.0 wingdings and

431

# webdings yet.

432

missing_text_chars -= _chars_by_age['7.0']

433

assert missing_text_chars == set(), (

434

'Text style version of some emoji characters are missing: ' +

435

repr(missing_text_chars))

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

436

437

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

438

# Setting reverse to true returns a dictionary that maps the values to sets of

439

# characters, useful for some binary properties. Otherwise, we get a

440

# dictionary that maps characters to the property values, assuming there's only

441

# one property in the file.

442

def parse_unicode_datafile(file_path, reverse=False):

443

if reverse:

444

output_dict = collections.defaultdict(set)

445

else:

446

output_dict = {}

447

with open(file_path) as datafile:

448

for line in datafile:

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

449

if '#' in line:

450

line = line[:line.index('#')]

451

line = line.strip()

452

if not line:

453

continue

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

454

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

455

chars, prop = line.split(';')[:2]

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

456

chars = chars.strip()

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

457

prop = prop.strip()

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

458

459

if ' ' in chars: # character sequence

460

sequence = [int(ch, 16) for ch in chars.split(' ')]

461

additions = [tuple(sequence)]

462

elif '..' in chars: # character range

463

char_start, char_end = chars.split('..')

464

char_start = int(char_start, 16)

465

char_end = int(char_end, 16)

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

466

additions = range(char_start, char_end+1)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

467

else: # singe character

468

additions = [int(chars, 16)]

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

469

if reverse:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

470

output_dict[prop].update(additions)

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

471

else:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

472

for addition in additions:

473

assert addition not in output_dict

474

output_dict[addition] = prop

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

return output_dict

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

478

def parse_emoji_variants(file_path):

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

479

emoji_set = set()

480

text_set = set()

481

with open(file_path) as datafile:

482

for line in datafile:

483

if '#' in line:

484

line = line[:line.index('#')]

line = line.strip()

if not line:

continue

sequence, description, _ = line.split(';')

489

sequence = sequence.strip().split(' ')

490

base = int(sequence[0], 16)

491

vs = int(sequence[1], 16)

492

description = description.strip()

493

if description == 'text style':

494

text_set.add((base, vs))

495

elif description == 'emoji style':

496

emoji_set.add((base, vs))

497

return text_set, emoji_set

498

499

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

500

def parse_ucd(ucd_path):

501

global _emoji_properties, _chars_by_age

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

502

global _text_variation_sequences, _emoji_variation_sequences

503

global _emoji_sequences, _emoji_zwj_sequences

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

504

_emoji_properties = parse_unicode_datafile(

505

path.join(ucd_path, 'emoji-data.txt'), reverse=True)

Roozbeh Pournader

f7a68c1

2017-04-04 18:59:31 -0700

[diff] [blame]

506

emoji_properties_additions = parse_unicode_datafile(

507

path.join(ucd_path, 'additions', 'emoji-data.txt'), reverse=True)

508

for prop in emoji_properties_additions.keys():

509

_emoji_properties[prop].update(emoji_properties_additions[prop])

510

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

511

_chars_by_age = parse_unicode_datafile(

512

path.join(ucd_path, 'DerivedAge.txt'), reverse=True)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

513

sequences = parse_emoji_variants(

514

path.join(ucd_path, 'emoji-variation-sequences.txt'))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

515

_text_variation_sequences, _emoji_variation_sequences = sequences

516

_emoji_sequences = parse_unicode_datafile(

517

path.join(ucd_path, 'emoji-sequences.txt'))

Siyamed Sinir

6e06ad0

2017-04-19 18:18:35 -0700

[diff] [blame]

518

_emoji_sequences.update(parse_unicode_datafile(

519

path.join(ucd_path, 'additions', 'emoji-sequences.txt')))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

520

_emoji_zwj_sequences = parse_unicode_datafile(

521

path.join(ucd_path, 'emoji-zwj-sequences.txt'))

Roozbeh Pournader

1800ba4

2017-03-17 18:23:23 -0700

[diff] [blame]

522

_emoji_zwj_sequences.update(parse_unicode_datafile(

523

path.join(ucd_path, 'additions', 'emoji-zwj-sequences.txt')))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

524

Siyamed Sinir

d97df5a

2018-04-12 13:11:42 -0700

[diff] [blame]

525

exclusions = parse_unicode_datafile(path.join(ucd_path, 'additions', 'emoji-exclusions.txt'))

526

_emoji_sequences = remove_emoji_exclude(_emoji_sequences, exclusions)

527

_emoji_zwj_sequences = remove_emoji_exclude(_emoji_zwj_sequences, exclusions)

528

_emoji_variation_sequences = remove_emoji_variation_exclude(_emoji_variation_sequences, exclusions)

Qingqing Deng

5e98771

2019-03-25 16:53:34 -0700

[diff] [blame]

529

# Unicode 12.0 adds Basic_Emoji in emoji-sequences.txt. We ignore them here since we are already

530

# checking the emoji presentations with emoji-variation-sequences.txt.

531

# Please refer to http://unicode.org/reports/tr51/#def_basic_emoji_set .

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

532

_emoji_sequences = {k: v for k, v in _emoji_sequences.items() if not v == 'Basic_Emoji' }

Qingqing Deng

5e98771

2019-03-25 16:53:34 -0700

[diff] [blame]

533

Siyamed Sinir

d97df5a

2018-04-12 13:11:42 -0700

[diff] [blame]

534

535

def remove_emoji_variation_exclude(source, items):

536

return source.difference(items.keys())

537

538

def remove_emoji_exclude(source, items):

539

return {k: v for k, v in source.items() if k not in items}

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

540

541

def flag_sequence(territory_code):

542

return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code)

543

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

544

EQUIVALENT_FLAGS = {

545

flag_sequence('BV'): flag_sequence('NO'),

546

flag_sequence('CP'): flag_sequence('FR'),

547

flag_sequence('HM'): flag_sequence('AU'),

548

flag_sequence('SJ'): flag_sequence('NO'),

549

flag_sequence('UM'): flag_sequence('US'),

550

}

551

552

COMBINING_KEYCAP = 0x20E3

553

554

LEGACY_ANDROID_EMOJI = {

555

0xFE4E5: flag_sequence('JP'),

556

0xFE4E6: flag_sequence('US'),

557

0xFE4E7: flag_sequence('FR'),

558

0xFE4E8: flag_sequence('DE'),

559

0xFE4E9: flag_sequence('IT'),

560

0xFE4EA: flag_sequence('GB'),

561

0xFE4EB: flag_sequence('ES'),

562

0xFE4EC: flag_sequence('RU'),

563

0xFE4ED: flag_sequence('CN'),

564

0xFE4EE: flag_sequence('KR'),

565

0xFE82C: (ord('#'), COMBINING_KEYCAP),

566

0xFE82E: (ord('1'), COMBINING_KEYCAP),

567

0xFE82F: (ord('2'), COMBINING_KEYCAP),

568

0xFE830: (ord('3'), COMBINING_KEYCAP),

569

0xFE831: (ord('4'), COMBINING_KEYCAP),

570

0xFE832: (ord('5'), COMBINING_KEYCAP),

571

0xFE833: (ord('6'), COMBINING_KEYCAP),

572

0xFE834: (ord('7'), COMBINING_KEYCAP),

573

0xFE835: (ord('8'), COMBINING_KEYCAP),

574

0xFE836: (ord('9'), COMBINING_KEYCAP),

575

0xFE837: (ord('0'), COMBINING_KEYCAP),

576

}

577

Siyamed Sinir

77a1b14

2018-07-12 12:02:18 -0700

[diff] [blame]

578

# This is used to define the emoji that should have the same glyph.

579

# i.e. previously we had gender based Kiss (0x1F48F), which had the same glyph

580

# with Kiss: Woman, Man (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468)

581

# in that case a valid row would be:

582

# (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F,

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

583

ZWJ_IDENTICALS = {

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

584

}

585

Seigo Nonaka

c180863

2018-05-14 13:39:40 -0700

[diff] [blame]

586

SAME_FLAG_MAPPINGS = [

587

# Diego Garcia and British Indian Ocean Territory

588

((0x1F1EE, 0x1F1F4), (0x1F1E9, 0x1F1EC)),

589

# St. Martin and France

590

((0x1F1F2, 0x1F1EB), (0x1F1EB, 0x1F1F7)),

591

# Spain and Ceuta & Melilla

592

((0x1F1EA, 0x1F1F8), (0x1F1EA, 0x1F1E6)),

593

]

594

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

595

ZWJ = 0x200D

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

596

597

def is_fitzpatrick_modifier(cp):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

598

return 0x1F3FB <= cp <= 0x1F3FF

599

600

601

def reverse_emoji(seq):

602

rev = list(reversed(seq))

603

# if there are fitzpatrick modifiers in the sequence, keep them after

604

# the emoji they modify

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

605

for i in range(1, len(rev)):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

606

if is_fitzpatrick_modifier(rev[i-1]):

607

rev[i], rev[i-1] = rev[i-1], rev[i]

608

return tuple(rev)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

609

610

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

611

def compute_expected_emoji():

612

equivalent_emoji = {}

613

sequence_pieces = set()

614

all_sequences = set()

615

all_sequences.update(_emoji_variation_sequences)

616

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

617

# add zwj sequences not in the current emoji-zwj-sequences.txt

618

adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences)

619

adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

620

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

621

# Add empty flag tag sequence that is supported as fallback

622

_emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence'

623

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

624

for sequence in _emoji_sequences.keys():

625

sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)

626

all_sequences.add(sequence)

627

sequence_pieces.update(sequence)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

628

if _emoji_sequences.get(sequence, None) == 'Emoji_Tag_Sequence':

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

629

# Add reverse of all emoji ZWJ sequences, which are added to the

630

# fonts as a workaround to get the sequences work in RTL text.

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

631

# TODO: test if these are actually needed by Minikin/HarfBuzz.

632

reversed_seq = reverse_emoji(sequence)

633

all_sequences.add(reversed_seq)

634

equivalent_emoji[reversed_seq] = sequence

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

635

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

636

for sequence in adjusted_emoji_zwj_sequences.keys():

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

637

sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)

638

all_sequences.add(sequence)

639

sequence_pieces.update(sequence)

640

# Add reverse of all emoji ZWJ sequences, which are added to the fonts

641

# as a workaround to get the sequences work in RTL text.

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

642

reversed_seq = reverse_emoji(sequence)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

643

all_sequences.add(reversed_seq)

644

equivalent_emoji[reversed_seq] = sequence

645

Seigo Nonaka

c180863

2018-05-14 13:39:40 -0700

[diff] [blame]

646

for first, second in SAME_FLAG_MAPPINGS:

647

equivalent_emoji[first] = second

648

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

649

# Add all tag characters used in flags

650

sequence_pieces.update(range(0xE0030, 0xE0039 + 1))

651

sequence_pieces.update(range(0xE0061, 0xE007A + 1))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

652

653

all_emoji = (

654

_emoji_properties['Emoji'] |

655

all_sequences |

656

sequence_pieces |

657

set(LEGACY_ANDROID_EMOJI.keys()))

658

default_emoji = (

659

_emoji_properties['Emoji_Presentation'] |

660

all_sequences |

661

set(LEGACY_ANDROID_EMOJI.keys()))

662

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

663

equivalent_emoji.update(EQUIVALENT_FLAGS)

664

equivalent_emoji.update(LEGACY_ANDROID_EMOJI)

665

equivalent_emoji.update(ZWJ_IDENTICALS)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

666

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

667

for seq in _emoji_variation_sequences:

668

equivalent_emoji[seq] = seq[0]

669

670

return all_emoji, default_emoji, equivalent_emoji

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

671

672

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

673

def check_compact_only_fallback():

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

674

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

675

for record in fallback_chain:

676

if record.variant == 'compact':

677

same_script_elegants = [x for x in fallback_chain

678

if x.scripts == record.scripts and x.variant == 'elegant']

679

assert same_script_elegants, (

680

'%s must be in elegant of %s as fallback of "%s" too' % (

681

record.font, record.scripts, record.fallback_for),)

682

683

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

684

def check_vertical_metrics():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

685

for record in _all_fonts:

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

686

if record.name in ['sans-serif', 'sans-serif-condensed']:

687

font = open_font(record.font)

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

688

assert font['head'].yMax == 2163 and font['head'].yMin == -555, (

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

689

'yMax and yMin of %s do not match expected values.' % (

690

record.font,))

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

691

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

692

if record.name in ['sans-serif', 'sans-serif-condensed',

693

'serif', 'monospace']:

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

694

font = open_font(record.font)

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

695

assert (font['hhea'].ascent == 1900 and

696

font['hhea'].descent == -500), (

697

'ascent and descent of %s do not match expected '

698

'values.' % (record.font,))

699

700

701

def check_cjk_punctuation():

702

cjk_scripts = {'Hans', 'Hant', 'Jpan', 'Kore'}

703

cjk_punctuation = range(0x3000, 0x301F + 1)

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

704

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

705

for record in fallback_chain:

706

if record.scripts.intersection(cjk_scripts):

707

# CJK font seen. Stop checking the rest of the fonts.

708

break

709

assert_font_supports_none_of_chars(record.font, cjk_punctuation, name)

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

710

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

711

def getPostScriptName(font):

712

font_file, index = font

713

font_path = path.join(_fonts_dir, font_file)

714

if index is not None:

715

# Use the first font file in the collection for resolving post script name.

716

ttf = ttLib.TTFont(font_path, fontNumber=0)

717

else:

718

ttf = ttLib.TTFont(font_path)

719

720

nameTable = ttf['name']

721

for name in nameTable.names:

722

if (name.nameID == 6 and name.platformID == 3 and name.platEncID == 1

723

and name.langID == 0x0409):

724

return str(name)

725

726

def check_canonical_name():

727

for record in _all_fonts:

728

file_name, index = record.font

729

730

psName = getPostScriptName(record.font)

731

if record.psName:

732

# If fonts element has postScriptName attribute, it should match with the PostScript

733

# name in the name table.

734

assert psName == record.psName, ('postScriptName attribute %s should match with %s' % (

735

record.psName, psName))

736

else:

737

# If fonts element doesn't have postScriptName attribute, the file name should match

738

# with the PostScript name in the name table.

739

assert psName == file_name[:-4], ('file name %s should match with %s' % (

740

file_name, psName))

741

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

742

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

743

def main():

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

744

global _fonts_dir

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

745

target_out = sys.argv[1]

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

746

_fonts_dir = path.join(target_out, 'fonts')

747

748

fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')

749

parse_fonts_xml(fonts_xml_path)

750

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

751

check_compact_only_fallback()

752

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

753

check_vertical_metrics()

754

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

755

hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')

756

check_hyphens(hyphens_dir)

757

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

758

check_cjk_punctuation()

759

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

760

check_canonical_name()

761

Roozbeh Pournader

27ec3ac

2016-03-31 13:05:32 -0700

[diff] [blame]

762

check_emoji = sys.argv[2]

763

if check_emoji == 'true':

764

ucd_path = sys.argv[3]

765

parse_ucd(ucd_path)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

766

all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame]

767

check_emoji_compat()

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

768

check_emoji_coverage(all_emoji, equivalent_emoji)

769

check_emoji_defaults(default_emoji)

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

770

Roozbeh Pournader