Skip to content

Use glyph indices for font tracking in vector formats #30335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: text-overhaul
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/api/next_api_changes/development/30143-ES.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Glyph indices now typed distinctly from character codes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Previously, character codes and glyph indices were both typed as `int`, which means you
could mix and match them erroneously. While the character code can't be made a distinct
type (because it's used for `chr`/`ord`), typing glyph indices as a distinct type means
these can't be fully swapped.
19 changes: 11 additions & 8 deletions lib/matplotlib/_afm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@
import inspect
import logging
import re
from typing import BinaryIO, NamedTuple, TypedDict
from typing import BinaryIO, NamedTuple, TypedDict, cast

from ._mathtext_data import uni2type1
from .ft2font import CharacterCodeType, GlyphIndexType


_log = logging.getLogger(__name__)
Expand Down Expand Up @@ -197,7 +198,7 @@ class CharMetrics(NamedTuple):
The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*)."""


def _parse_char_metrics(fh: BinaryIO) -> tuple[dict[int, CharMetrics],
def _parse_char_metrics(fh: BinaryIO) -> tuple[dict[CharacterCodeType, CharMetrics],
dict[str, CharMetrics]]:
"""
Parse the given filehandle for character metrics information.
Expand All @@ -218,7 +219,7 @@ def _parse_char_metrics(fh: BinaryIO) -> tuple[dict[int, CharMetrics],
"""
required_keys = {'C', 'WX', 'N', 'B'}

ascii_d: dict[int, CharMetrics] = {}
ascii_d: dict[CharacterCodeType, CharMetrics] = {}
name_d: dict[str, CharMetrics] = {}
for bline in fh:
# We are defensively letting values be utf8. The spec requires
Expand Down Expand Up @@ -409,19 +410,21 @@ def get_str_bbox_and_descent(self, s: str) -> tuple[int, int, float, int, int]:

return left, miny, total_width, maxy - miny, -miny

def get_glyph_name(self, glyph_ind: int) -> str: # For consistency with FT2Font.
def get_glyph_name(self, # For consistency with FT2Font.
glyph_ind: GlyphIndexType) -> str:
"""Get the name of the glyph, i.e., ord(';') is 'semicolon'."""
return self._metrics[glyph_ind].name
return self._metrics[cast(CharacterCodeType, glyph_ind)].name

def get_char_index(self, c: int) -> int: # For consistency with FT2Font.
def get_char_index(self, # For consistency with FT2Font.
c: CharacterCodeType) -> GlyphIndexType:
"""
Return the glyph index corresponding to a character code point.

Note, for AFM fonts, we treat the glyph index the same as the codepoint.
"""
return c
return cast(GlyphIndexType, c)

def get_width_char(self, c: int) -> float:
def get_width_char(self, c: CharacterCodeType) -> float:
"""Get the width of the character code from the character metric WX field."""
return self._metrics[c].width

Expand Down
38 changes: 20 additions & 18 deletions lib/matplotlib/_mathtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@

if T.TYPE_CHECKING:
from collections.abc import Iterable
from .ft2font import Glyph
from .ft2font import CharacterCodeType, Glyph, GlyphIndexType


ParserElement.enable_packrat()
_log = logging.getLogger("matplotlib.mathtext")
Expand All @@ -47,7 +48,7 @@
# FONTS


def get_unicode_index(symbol: str) -> int: # Publicly exported.
def get_unicode_index(symbol: str) -> CharacterCodeType: # Publicly exported.
r"""
Return the integer index (from the Unicode table) of *symbol*.

Expand Down Expand Up @@ -85,7 +86,7 @@ class VectorParse(NamedTuple):
width: float
height: float
depth: float
glyphs: list[tuple[FT2Font, float, int, float, float]]
glyphs: list[tuple[FT2Font, float, GlyphIndexType, float, float]]
rects: list[tuple[float, float, float, float]]

VectorParse.__module__ = "matplotlib.mathtext"
Expand Down Expand Up @@ -130,7 +131,7 @@ def __init__(self, box: Box):
def to_vector(self) -> VectorParse:
w, h, d = map(
np.ceil, [self.box.width, self.box.height, self.box.depth])
gs = [(info.font, info.fontsize, info.num, ox, h - oy + info.offset)
gs = [(info.font, info.fontsize, info.glyph_id, ox, h - oy + info.offset)
for ox, oy, info in self.glyphs]
rs = [(x1, h - y2, x2 - x1, y2 - y1)
for x1, y1, x2, y2 in self.rects]
Expand Down Expand Up @@ -212,7 +213,7 @@ class FontInfo(NamedTuple):
fontsize: float
postscript_name: str
metrics: FontMetrics
num: int
glyph_id: GlyphIndexType
glyph: Glyph
offset: float

Expand Down Expand Up @@ -365,15 +366,16 @@ def _get_offset(self, font: FT2Font, glyph: Glyph, fontsize: float,
return 0.

def _get_glyph(self, fontname: str, font_class: str,
sym: str) -> tuple[FT2Font, int, bool]:
sym: str) -> tuple[FT2Font, CharacterCodeType, bool]:
raise NotImplementedError

# The return value of _get_info is cached per-instance.
def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
dpi: float) -> FontInfo:
font, num, slanted = self._get_glyph(fontname, font_class, sym)
font.set_size(fontsize, dpi)
glyph = font.load_char(num, flags=self.load_glyph_flags)
glyph_id = font.get_char_index(num)
glyph = font.load_glyph(glyph_id, flags=self.load_glyph_flags)

xmin, ymin, xmax, ymax = (val / 64 for val in glyph.bbox)
offset = self._get_offset(font, glyph, fontsize, dpi)
Expand All @@ -395,7 +397,7 @@ def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
fontsize=fontsize,
postscript_name=font.postscript_name,
metrics=metrics,
num=num,
glyph_id=glyph_id,
glyph=glyph,
offset=offset
)
Expand Down Expand Up @@ -425,7 +427,8 @@ def get_kern(self, font1: str, fontclass1: str, sym1: str, fontsize1: float,
info1 = self._get_info(font1, fontclass1, sym1, fontsize1, dpi)
info2 = self._get_info(font2, fontclass2, sym2, fontsize2, dpi)
font = info1.font
return font.get_kerning(info1.num, info2.num, Kerning.DEFAULT) / 64
return font.get_kerning(info1.glyph_id, info2.glyph_id,
Kerning.DEFAULT) / 64
return super().get_kern(font1, fontclass1, sym1, fontsize1,
font2, fontclass2, sym2, fontsize2, dpi)

Expand Down Expand Up @@ -459,7 +462,7 @@ def __init__(self, default_font_prop: FontProperties, load_glyph_flags: LoadFlag
_slanted_symbols = set(r"\int \oint".split())

def _get_glyph(self, fontname: str, font_class: str,
sym: str) -> tuple[FT2Font, int, bool]:
sym: str) -> tuple[FT2Font, CharacterCodeType, bool]:
font = None
if fontname in self.fontmap and sym in latex_to_bakoma:
basename, num = latex_to_bakoma[sym]
Expand Down Expand Up @@ -551,7 +554,7 @@ class UnicodeFonts(TruetypeFonts):
# Some glyphs are not present in the `cmr10` font, and must be brought in
# from `cmsy10`. Map the Unicode indices of those glyphs to the indices at
# which they are found in `cmsy10`.
_cmr10_substitutions = {
_cmr10_substitutions: dict[CharacterCodeType, CharacterCodeType] = {
0x00D7: 0x00A3, # Multiplication sign.
0x2212: 0x00A1, # Minus sign.
}
Expand Down Expand Up @@ -594,11 +597,11 @@ def __init__(self, default_font_prop: FontProperties, load_glyph_flags: LoadFlag
_slanted_symbols = set(r"\int \oint".split())

def _map_virtual_font(self, fontname: str, font_class: str,
uniindex: int) -> tuple[str, int]:
uniindex: CharacterCodeType) -> tuple[str, CharacterCodeType]:
return fontname, uniindex

def _get_glyph(self, fontname: str, font_class: str,
sym: str) -> tuple[FT2Font, int, bool]:
sym: str) -> tuple[FT2Font, CharacterCodeType, bool]:
try:
uniindex = get_unicode_index(sym)
found_symbol = True
Expand All @@ -607,8 +610,7 @@ def _get_glyph(self, fontname: str, font_class: str,
found_symbol = False
_log.warning("No TeX to Unicode mapping for %a.", sym)

fontname, uniindex = self._map_virtual_font(
fontname, font_class, uniindex)
fontname, uniindex = self._map_virtual_font(fontname, font_class, uniindex)

new_fontname = fontname

Expand Down Expand Up @@ -693,7 +695,7 @@ def __init__(self, default_font_prop: FontProperties, load_glyph_flags: LoadFlag
self.fontmap[name] = fullpath

def _get_glyph(self, fontname: str, font_class: str,
sym: str) -> tuple[FT2Font, int, bool]:
sym: str) -> tuple[FT2Font, CharacterCodeType, bool]:
# Override prime symbol to use Bakoma.
if sym == r'\prime':
return self.bakoma._get_glyph(fontname, font_class, sym)
Expand Down Expand Up @@ -783,7 +785,7 @@ def __init__(self, default_font_prop: FontProperties, load_glyph_flags: LoadFlag
self.fontmap[name] = fullpath

def _map_virtual_font(self, fontname: str, font_class: str,
uniindex: int) -> tuple[str, int]:
uniindex: CharacterCodeType) -> tuple[str, CharacterCodeType]:
# Handle these "fonts" that are actually embedded in
# other fonts.
font_mapping = stix_virtual_fonts.get(fontname)
Expand Down Expand Up @@ -1170,7 +1172,7 @@ def __init__(self, elements: T.Sequence[Node]):
self.glue_sign = 0 # 0: normal, -1: shrinking, 1: stretching
self.glue_order = 0 # The order of infinity (0 - 3) for the glue

def __repr__(self):
def __repr__(self) -> str:
return "{}<w={:.02f} h={:.02f} d={:.02f} s={:.02f}>[{}]".format(
super().__repr__(),
self.width, self.height,
Expand Down
18 changes: 11 additions & 7 deletions lib/matplotlib/_mathtext_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
"""

from __future__ import annotations
from typing import overload
from typing import TypeAlias, overload

latex_to_bakoma = {
from .ft2font import CharacterCodeType


latex_to_bakoma: dict[str, tuple[str, CharacterCodeType]] = {
'\\__sqrt__' : ('cmex10', 0x70),
'\\bigcap' : ('cmex10', 0x5c),
'\\bigcup' : ('cmex10', 0x5b),
Expand Down Expand Up @@ -241,7 +244,7 @@

# Automatically generated.

type12uni = {
type12uni: dict[str, CharacterCodeType] = {
'aring' : 229,
'quotedblright' : 8221,
'V' : 86,
Expand Down Expand Up @@ -475,7 +478,7 @@
# for key in sd:
# print("{0:24} : {1: <s},".format("'" + key + "'", sd[key]))

tex2uni = {
tex2uni: dict[str, CharacterCodeType] = {
'#' : 0x23,
'$' : 0x24,
'%' : 0x25,
Expand Down Expand Up @@ -1113,8 +1116,9 @@
# Each element is a 4-tuple of the form:
# src_start, src_end, dst_font, dst_start

_EntryTypeIn = tuple[str, str, str, str | int]
_EntryTypeOut = tuple[int, int, str, int]
_EntryTypeIn: TypeAlias = tuple[str, str, str, str | CharacterCodeType]
_EntryTypeOut: TypeAlias = tuple[CharacterCodeType, CharacterCodeType, str,
CharacterCodeType]

_stix_virtual_fonts: dict[str, dict[str, list[_EntryTypeIn]] | list[_EntryTypeIn]] = {
'bb': {
Expand Down Expand Up @@ -1735,7 +1739,7 @@ def _normalize_stix_fontcodes(d):
del _stix_virtual_fonts

# Fix some incorrect glyphs.
stix_glyph_fixes = {
stix_glyph_fixes: dict[CharacterCodeType, CharacterCodeType] = {
# Cap and Cup glyphs are swapped.
0x22d2: 0x22d3,
0x22d3: 0x22d2,
Expand Down
4 changes: 2 additions & 2 deletions lib/matplotlib/_text_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import dataclasses

from . import _api
from .ft2font import FT2Font, Kerning, LoadFlags
from .ft2font import FT2Font, GlyphIndexType, Kerning, LoadFlags


@dataclasses.dataclass(frozen=True)
class LayoutItem:
ft_object: FT2Font
char: str
glyph_idx: int
glyph_index: GlyphIndexType
x: float
prev_kern: float

Expand Down
27 changes: 14 additions & 13 deletions lib/matplotlib/backends/_backend_pdf_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,27 @@ def _cached_get_afm_from_fname(fname):
return AFM(fh)


def get_glyphs_subset(fontfile, characters):
def get_glyphs_subset(fontfile, glyphs):
"""
Subset a TTF font
Subset a TTF font.
Reads the named fontfile and restricts the font to the characters.
Reads the named fontfile and restricts the font to the glyphs.
Parameters
----------
fontfile : str
Path to the font file
characters : str
Continuous set of characters to include in subset
glyphs : set[int]
Set of glyph IDs to include in subset.
Returns
-------
fontTools.ttLib.ttFont.TTFont
An open font object representing the subset, which needs to
be closed by the caller.
"""

options = subset.Options(glyph_names=True, recommended_glyphs=True)
options = subset.Options(glyph_names=True, recommended_glyphs=True,
retain_gids=True)

# Prevent subsetting extra tables.
options.drop_tables += [
Expand Down Expand Up @@ -71,7 +71,7 @@ def get_glyphs_subset(fontfile, characters):

font = subset.load_font(fontfile, options)
subsetter = subset.Subsetter(options=options)
subsetter.populate(text=characters)
subsetter.populate(gids=glyphs)
subsetter.subset(font)
return font

Expand All @@ -97,10 +97,10 @@ def font_as_file(font):

class CharacterTracker:
"""
Helper for font subsetting by the pdf and ps backends.
Helper for font subsetting by the PDF and PS backends.
Maintains a mapping of font paths to the set of character codepoints that
are being used from that font.
Maintains a mapping of font paths to the set of glyphs that are being used from that
font.
"""

def __init__(self):
Expand All @@ -110,10 +110,11 @@ def track(self, font, s):
"""Record that string *s* is being typeset using font *font*."""
char_to_font = font._get_fontmap(s)
for _c, _f in char_to_font.items():
self.used.setdefault(_f.fname, set()).add(ord(_c))
glyph_index = _f.get_char_index(ord(_c))
self.used.setdefault(_f.fname, set()).add(glyph_index)

def track_glyph(self, font, glyph):
"""Record that codepoint *glyph* is being typeset using font *font*."""
"""Record that glyph index *glyph* is being typeset using font *font*."""
self.used.setdefault(font.fname, set()).add(glyph)


Expand Down
10 changes: 5 additions & 5 deletions lib/matplotlib/backends/backend_cairo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import functools
import gzip
import itertools
import math

import numpy as np
Expand Down Expand Up @@ -248,13 +249,12 @@ def _draw_mathtext(self, gc, x, y, s, prop, angle):
if angle:
ctx.rotate(np.deg2rad(-angle))

for font, fontsize, idx, ox, oy in glyphs:
for (font, fontsize), font_glyphs in itertools.groupby(
glyphs, key=lambda x: (x[0], x[1])):
ctx.new_path()
ctx.move_to(ox, -oy)
ctx.select_font_face(
*_cairo_font_args_from_font_prop(ttfFontProperty(font)))
ctx.select_font_face(*_cairo_font_args_from_font_prop(ttfFontProperty(font)))
ctx.set_font_size(self.points_to_pixels(fontsize))
ctx.show_text(chr(idx))
ctx.show_glyphs([(idx, ox, -oy) for _, _, idx, ox, oy in font_glyphs])

for ox, oy, w, h in rects:
ctx.new_path()
Expand Down
Loading
Loading