|
""" |
|
A python interface to Adobe Font Metrics Files. |
|
|
|
Although a number of other Python implementations exist, and may be more |
|
complete than this, it was decided not to go with them because they were |
|
either: |
|
|
|
1) copyrighted or used a non-BSD compatible license |
|
2) had too many dependencies and a free standing lib was needed |
|
3) did more than needed and it was easier to write afresh rather than |
|
figure out how to get just what was needed. |
|
|
|
It is pretty easy to use, and has no external dependencies: |
|
|
|
>>> import matplotlib as mpl |
|
>>> from pathlib import Path |
|
>>> afm_path = Path(mpl.get_data_path(), 'fonts', 'afm', 'ptmr8a.afm') |
|
>>> |
|
>>> from matplotlib.afm import AFM |
|
>>> with afm_path.open('rb') as fh: |
|
... afm = AFM(fh) |
|
>>> afm.string_width_height('What the heck?') |
|
(6220.0, 694) |
|
>>> afm.get_fontname() |
|
'Times-Roman' |
|
>>> afm.get_kern_dist('A', 'f') |
|
0 |
|
>>> afm.get_kern_dist('A', 'y') |
|
-92.0 |
|
>>> afm.get_bbox_char('!') |
|
[130, -9, 238, 676] |
|
|
|
As in the Adobe Font Metrics File Format Specification, all dimensions |
|
are given in units of 1/1000 of the scale factor (point size) of the font |
|
being used. |
|
""" |
|
|
|
from collections import namedtuple |
|
import logging |
|
import re |
|
|
|
from ._mathtext_data import uni2type1 |
|
|
|
|
|
_log = logging.getLogger(__name__) |
|
|
|
|
|
def _to_int(x): |
|
|
|
|
|
|
|
|
|
|
|
return int(float(x)) |
|
|
|
|
|
def _to_float(x): |
|
|
|
|
|
|
|
if isinstance(x, bytes): |
|
|
|
|
|
x = x.decode('latin-1') |
|
return float(x.replace(',', '.')) |
|
|
|
|
|
def _to_str(x): |
|
return x.decode('utf8') |
|
|
|
|
|
def _to_list_of_ints(s): |
|
s = s.replace(b',', b' ') |
|
return [_to_int(val) for val in s.split()] |
|
|
|
|
|
def _to_list_of_floats(s): |
|
return [_to_float(val) for val in s.split()] |
|
|
|
|
|
def _to_bool(s): |
|
if s.lower().strip() in (b'false', b'0', b'no'): |
|
return False |
|
else: |
|
return True |
|
|
|
|
|
def _parse_header(fh): |
|
""" |
|
Read the font metrics header (up to the char metrics) and returns |
|
a dictionary mapping *key* to *val*. *val* will be converted to the |
|
appropriate python type as necessary; e.g.: |
|
|
|
* 'False'->False |
|
* '0'->0 |
|
* '-168 -218 1000 898'-> [-168, -218, 1000, 898] |
|
|
|
Dictionary keys are |
|
|
|
StartFontMetrics, FontName, FullName, FamilyName, Weight, |
|
ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition, |
|
UnderlineThickness, Version, Notice, EncodingScheme, CapHeight, |
|
XHeight, Ascender, Descender, StartCharMetrics |
|
""" |
|
header_converters = { |
|
b'StartFontMetrics': _to_float, |
|
b'FontName': _to_str, |
|
b'FullName': _to_str, |
|
b'FamilyName': _to_str, |
|
b'Weight': _to_str, |
|
b'ItalicAngle': _to_float, |
|
b'IsFixedPitch': _to_bool, |
|
b'FontBBox': _to_list_of_ints, |
|
b'UnderlinePosition': _to_float, |
|
b'UnderlineThickness': _to_float, |
|
b'Version': _to_str, |
|
|
|
|
|
|
|
b'Notice': lambda x: x, |
|
b'EncodingScheme': _to_str, |
|
b'CapHeight': _to_float, |
|
b'Capheight': _to_float, |
|
b'XHeight': _to_float, |
|
b'Ascender': _to_float, |
|
b'Descender': _to_float, |
|
b'StdHW': _to_float, |
|
b'StdVW': _to_float, |
|
b'StartCharMetrics': _to_int, |
|
b'CharacterSet': _to_str, |
|
b'Characters': _to_int, |
|
} |
|
d = {} |
|
first_line = True |
|
for line in fh: |
|
line = line.rstrip() |
|
if line.startswith(b'Comment'): |
|
continue |
|
lst = line.split(b' ', 1) |
|
key = lst[0] |
|
if first_line: |
|
|
|
|
|
|
|
|
|
|
|
if key != b'StartFontMetrics': |
|
raise RuntimeError('Not an AFM file') |
|
first_line = False |
|
if len(lst) == 2: |
|
val = lst[1] |
|
else: |
|
val = b'' |
|
try: |
|
converter = header_converters[key] |
|
except KeyError: |
|
_log.error("Found an unknown keyword in AFM header (was %r)", key) |
|
continue |
|
try: |
|
d[key] = converter(val) |
|
except ValueError: |
|
_log.error('Value error parsing header in AFM: %s, %s', key, val) |
|
continue |
|
if key == b'StartCharMetrics': |
|
break |
|
else: |
|
raise RuntimeError('Bad parse') |
|
return d |
|
|
|
|
|
CharMetrics = namedtuple('CharMetrics', 'width, name, bbox') |
|
CharMetrics.__doc__ = """ |
|
Represents the character metrics of a single character. |
|
|
|
Notes |
|
----- |
|
The fields do currently only describe a subset of character metrics |
|
information defined in the AFM standard. |
|
""" |
|
CharMetrics.width.__doc__ = """The character width (WX).""" |
|
CharMetrics.name.__doc__ = """The character name (N).""" |
|
CharMetrics.bbox.__doc__ = """ |
|
The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*).""" |
|
|
|
|
|
def _parse_char_metrics(fh): |
|
""" |
|
Parse the given filehandle for character metrics information and return |
|
the information as dicts. |
|
|
|
It is assumed that the file cursor is on the line behind |
|
'StartCharMetrics'. |
|
|
|
Returns |
|
------- |
|
ascii_d : dict |
|
A mapping "ASCII num of the character" to `.CharMetrics`. |
|
name_d : dict |
|
A mapping "character name" to `.CharMetrics`. |
|
|
|
Notes |
|
----- |
|
This function is incomplete per the standard, but thus far parses |
|
all the sample afm files tried. |
|
""" |
|
required_keys = {'C', 'WX', 'N', 'B'} |
|
|
|
ascii_d = {} |
|
name_d = {} |
|
for line in fh: |
|
|
|
|
|
line = _to_str(line.rstrip()) |
|
if line.startswith('EndCharMetrics'): |
|
return ascii_d, name_d |
|
|
|
vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s) |
|
|
|
if not required_keys.issubset(vals): |
|
raise RuntimeError('Bad char metrics line: %s' % line) |
|
num = _to_int(vals['C']) |
|
wx = _to_float(vals['WX']) |
|
name = vals['N'] |
|
bbox = _to_list_of_floats(vals['B']) |
|
bbox = list(map(int, bbox)) |
|
metrics = CharMetrics(wx, name, bbox) |
|
|
|
|
|
|
|
if name == 'Euro': |
|
num = 128 |
|
elif name == 'minus': |
|
num = ord("\N{MINUS SIGN}") |
|
if num != -1: |
|
ascii_d[num] = metrics |
|
name_d[name] = metrics |
|
raise RuntimeError('Bad parse') |
|
|
|
|
|
def _parse_kern_pairs(fh): |
|
""" |
|
Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and |
|
values are the kern pair value. For example, a kern pairs line like |
|
``KPX A y -50`` |
|
|
|
will be represented as:: |
|
|
|
d[ ('A', 'y') ] = -50 |
|
|
|
""" |
|
|
|
line = next(fh) |
|
if not line.startswith(b'StartKernPairs'): |
|
raise RuntimeError('Bad start of kern pairs data: %s' % line) |
|
|
|
d = {} |
|
for line in fh: |
|
line = line.rstrip() |
|
if not line: |
|
continue |
|
if line.startswith(b'EndKernPairs'): |
|
next(fh) |
|
return d |
|
vals = line.split() |
|
if len(vals) != 4 or vals[0] != b'KPX': |
|
raise RuntimeError('Bad kern pairs line: %s' % line) |
|
c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3]) |
|
d[(c1, c2)] = val |
|
raise RuntimeError('Bad kern pairs parse') |
|
|
|
|
|
CompositePart = namedtuple('CompositePart', 'name, dx, dy') |
|
CompositePart.__doc__ = """ |
|
Represents the information on a composite element of a composite char.""" |
|
CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'.""" |
|
CompositePart.dx.__doc__ = """x-displacement of the part from the origin.""" |
|
CompositePart.dy.__doc__ = """y-displacement of the part from the origin.""" |
|
|
|
|
|
def _parse_composites(fh): |
|
""" |
|
Parse the given filehandle for composites information return them as a |
|
dict. |
|
|
|
It is assumed that the file cursor is on the line behind 'StartComposites'. |
|
|
|
Returns |
|
------- |
|
dict |
|
A dict mapping composite character names to a parts list. The parts |
|
list is a list of `.CompositePart` entries describing the parts of |
|
the composite. |
|
|
|
Examples |
|
-------- |
|
A composite definition line:: |
|
|
|
CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ; |
|
|
|
will be represented as:: |
|
|
|
composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0), |
|
CompositePart(name='acute', dx=160, dy=170)] |
|
|
|
""" |
|
composites = {} |
|
for line in fh: |
|
line = line.rstrip() |
|
if not line: |
|
continue |
|
if line.startswith(b'EndComposites'): |
|
return composites |
|
vals = line.split(b';') |
|
cc = vals[0].split() |
|
name, _num_parts = cc[1], _to_int(cc[2]) |
|
pccParts = [] |
|
for s in vals[1:-1]: |
|
pcc = s.split() |
|
part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3])) |
|
pccParts.append(part) |
|
composites[name] = pccParts |
|
|
|
raise RuntimeError('Bad composites parse') |
|
|
|
|
|
def _parse_optional(fh): |
|
""" |
|
Parse the optional fields for kern pair data and composites. |
|
|
|
Returns |
|
------- |
|
kern_data : dict |
|
A dict containing kerning information. May be empty. |
|
See `._parse_kern_pairs`. |
|
composites : dict |
|
A dict containing composite information. May be empty. |
|
See `._parse_composites`. |
|
""" |
|
optional = { |
|
b'StartKernData': _parse_kern_pairs, |
|
b'StartComposites': _parse_composites, |
|
} |
|
|
|
d = {b'StartKernData': {}, |
|
b'StartComposites': {}} |
|
for line in fh: |
|
line = line.rstrip() |
|
if not line: |
|
continue |
|
key = line.split()[0] |
|
|
|
if key in optional: |
|
d[key] = optional[key](fh) |
|
|
|
return d[b'StartKernData'], d[b'StartComposites'] |
|
|
|
|
|
class AFM: |
|
|
|
def __init__(self, fh): |
|
"""Parse the AFM file in file object *fh*.""" |
|
self._header = _parse_header(fh) |
|
self._metrics, self._metrics_by_name = _parse_char_metrics(fh) |
|
self._kern, self._composite = _parse_optional(fh) |
|
|
|
def get_bbox_char(self, c, isord=False): |
|
if not isord: |
|
c = ord(c) |
|
return self._metrics[c].bbox |
|
|
|
def string_width_height(self, s): |
|
""" |
|
Return the string width (including kerning) and string height |
|
as a (*w*, *h*) tuple. |
|
""" |
|
if not len(s): |
|
return 0, 0 |
|
total_width = 0 |
|
namelast = None |
|
miny = 1e9 |
|
maxy = 0 |
|
for c in s: |
|
if c == '\n': |
|
continue |
|
wx, name, bbox = self._metrics[ord(c)] |
|
|
|
total_width += wx + self._kern.get((namelast, name), 0) |
|
l, b, w, h = bbox |
|
miny = min(miny, b) |
|
maxy = max(maxy, b + h) |
|
|
|
namelast = name |
|
|
|
return total_width, maxy - miny |
|
|
|
def get_str_bbox_and_descent(self, s): |
|
"""Return the string bounding box and the maximal descent.""" |
|
if not len(s): |
|
return 0, 0, 0, 0, 0 |
|
total_width = 0 |
|
namelast = None |
|
miny = 1e9 |
|
maxy = 0 |
|
left = 0 |
|
if not isinstance(s, str): |
|
s = _to_str(s) |
|
for c in s: |
|
if c == '\n': |
|
continue |
|
name = uni2type1.get(ord(c), f"uni{ord(c):04X}") |
|
try: |
|
wx, _, bbox = self._metrics_by_name[name] |
|
except KeyError: |
|
name = 'question' |
|
wx, _, bbox = self._metrics_by_name[name] |
|
total_width += wx + self._kern.get((namelast, name), 0) |
|
l, b, w, h = bbox |
|
left = min(left, l) |
|
miny = min(miny, b) |
|
maxy = max(maxy, b + h) |
|
|
|
namelast = name |
|
|
|
return left, miny, total_width, maxy - miny, -miny |
|
|
|
def get_str_bbox(self, s): |
|
"""Return the string bounding box.""" |
|
return self.get_str_bbox_and_descent(s)[:4] |
|
|
|
def get_name_char(self, c, isord=False): |
|
"""Get the name of the character, i.e., ';' is 'semicolon'.""" |
|
if not isord: |
|
c = ord(c) |
|
return self._metrics[c].name |
|
|
|
def get_width_char(self, c, isord=False): |
|
""" |
|
Get the width of the character from the character metric WX field. |
|
""" |
|
if not isord: |
|
c = ord(c) |
|
return self._metrics[c].width |
|
|
|
def get_width_from_char_name(self, name): |
|
"""Get the width of the character from a type1 character name.""" |
|
return self._metrics_by_name[name].width |
|
|
|
def get_height_char(self, c, isord=False): |
|
"""Get the bounding box (ink) height of character *c* (space is 0).""" |
|
if not isord: |
|
c = ord(c) |
|
return self._metrics[c].bbox[-1] |
|
|
|
def get_kern_dist(self, c1, c2): |
|
""" |
|
Return the kerning pair distance (possibly 0) for chars *c1* and *c2*. |
|
""" |
|
name1, name2 = self.get_name_char(c1), self.get_name_char(c2) |
|
return self.get_kern_dist_from_name(name1, name2) |
|
|
|
def get_kern_dist_from_name(self, name1, name2): |
|
""" |
|
Return the kerning pair distance (possibly 0) for chars |
|
*name1* and *name2*. |
|
""" |
|
return self._kern.get((name1, name2), 0) |
|
|
|
def get_fontname(self): |
|
"""Return the font name, e.g., 'Times-Roman'.""" |
|
return self._header[b'FontName'] |
|
|
|
@property |
|
def postscript_name(self): |
|
return self.get_fontname() |
|
|
|
def get_fullname(self): |
|
"""Return the font full name, e.g., 'Times-Roman'.""" |
|
name = self._header.get(b'FullName') |
|
if name is None: |
|
name = self._header[b'FontName'] |
|
return name |
|
|
|
def get_familyname(self): |
|
"""Return the font family name, e.g., 'Times'.""" |
|
name = self._header.get(b'FamilyName') |
|
if name is not None: |
|
return name |
|
|
|
|
|
name = self.get_fullname() |
|
extras = (r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|' |
|
r'light|ultralight|extra|condensed))+$') |
|
return re.sub(extras, '', name) |
|
|
|
@property |
|
def family_name(self): |
|
"""The font family name, e.g., 'Times'.""" |
|
return self.get_familyname() |
|
|
|
def get_weight(self): |
|
"""Return the font weight, e.g., 'Bold' or 'Roman'.""" |
|
return self._header[b'Weight'] |
|
|
|
def get_angle(self): |
|
"""Return the fontangle as float.""" |
|
return self._header[b'ItalicAngle'] |
|
|
|
def get_capheight(self): |
|
"""Return the cap height as float.""" |
|
return self._header[b'CapHeight'] |
|
|
|
def get_xheight(self): |
|
"""Return the xheight as float.""" |
|
return self._header[b'XHeight'] |
|
|
|
def get_underline_thickness(self): |
|
"""Return the underline thickness as float.""" |
|
return self._header[b'UnderlineThickness'] |
|
|
|
def get_horizontal_stem_width(self): |
|
""" |
|
Return the standard horizontal stem width as float, or *None* if |
|
not specified in AFM file. |
|
""" |
|
return self._header.get(b'StdHW', None) |
|
|
|
def get_vertical_stem_width(self): |
|
""" |
|
Return the standard vertical stem width as float, or *None* if |
|
not specified in AFM file. |
|
""" |
|
return self._header.get(b'StdVW', None) |
|
|