|
import logging |
|
import os |
|
from collections import defaultdict, namedtuple |
|
from functools import reduce |
|
from itertools import chain |
|
from math import log2 |
|
from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple |
|
|
|
from fontTools.config import OPTIONS |
|
from fontTools.misc.intTools import bit_count, bit_indices |
|
from fontTools.ttLib import TTFont |
|
from fontTools.ttLib.tables import otBase, otTables |
|
|
|
log = logging.getLogger(__name__) |
|
|
|
COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"] |
|
|
|
|
|
|
|
GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE" |
|
GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default) |
|
|
|
|
|
def _compression_level_from_env() -> int: |
|
env_level = GPOS_COMPACT_MODE_DEFAULT |
|
if GPOS_COMPACT_MODE_ENV_KEY in os.environ: |
|
import warnings |
|
|
|
warnings.warn( |
|
f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. " |
|
"Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option " |
|
"in TTFont.cfg.", |
|
DeprecationWarning, |
|
) |
|
|
|
env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY] |
|
if len(env_level) == 1 and env_level in "0123456789": |
|
return int(env_level) |
|
raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}") |
|
|
|
|
|
def compact(font: TTFont, level: int) -> TTFont: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gpos = font["GPOS"] |
|
for lookup in gpos.table.LookupList.Lookup: |
|
if lookup.LookupType == 2: |
|
compact_lookup(font, level, lookup) |
|
elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2: |
|
compact_ext_lookup(font, level, lookup) |
|
return font |
|
|
|
|
|
def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None: |
|
new_subtables = compact_pair_pos(font, level, lookup.SubTable) |
|
lookup.SubTable = new_subtables |
|
lookup.SubTableCount = len(new_subtables) |
|
|
|
|
|
def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None: |
|
new_subtables = compact_pair_pos( |
|
font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable] |
|
) |
|
new_ext_subtables = [] |
|
for subtable in new_subtables: |
|
ext_subtable = otTables.ExtensionPos() |
|
ext_subtable.Format = 1 |
|
ext_subtable.ExtSubTable = subtable |
|
new_ext_subtables.append(ext_subtable) |
|
lookup.SubTable = new_ext_subtables |
|
lookup.SubTableCount = len(new_ext_subtables) |
|
|
|
|
|
def compact_pair_pos( |
|
font: TTFont, level: int, subtables: Sequence[otTables.PairPos] |
|
) -> Sequence[otTables.PairPos]: |
|
new_subtables = [] |
|
for subtable in subtables: |
|
if subtable.Format == 1: |
|
|
|
new_subtables.append(subtable) |
|
elif subtable.Format == 2: |
|
new_subtables.extend(compact_class_pairs(font, level, subtable)) |
|
return new_subtables |
|
|
|
|
|
def compact_class_pairs( |
|
font: TTFont, level: int, subtable: otTables.PairPos |
|
) -> List[otTables.PairPos]: |
|
from fontTools.otlLib.builder import buildPairPosClassesSubtable |
|
|
|
subtables = [] |
|
classes1: DefaultDict[int, List[str]] = defaultdict(list) |
|
for g in subtable.Coverage.glyphs: |
|
classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g) |
|
classes2: DefaultDict[int, List[str]] = defaultdict(list) |
|
for g, i in subtable.ClassDef2.classDefs.items(): |
|
classes2[i].append(g) |
|
all_pairs = {} |
|
for i, class1 in enumerate(subtable.Class1Record): |
|
for j, class2 in enumerate(class1.Class2Record): |
|
if is_really_zero(class2): |
|
continue |
|
all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = ( |
|
getattr(class2, "Value1", None), |
|
getattr(class2, "Value2", None), |
|
) |
|
grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level) |
|
for pairs in grouped_pairs: |
|
subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap())) |
|
return subtables |
|
|
|
|
|
def is_really_zero(class2: otTables.Class2Record) -> bool: |
|
v1 = getattr(class2, "Value1", None) |
|
v2 = getattr(class2, "Value2", None) |
|
return (v1 is None or v1.getEffectiveFormat() == 0) and ( |
|
v2 is None or v2.getEffectiveFormat() == 0 |
|
) |
|
|
|
|
|
Pairs = Dict[ |
|
Tuple[Tuple[str, ...], Tuple[str, ...]], |
|
Tuple[otBase.ValueRecord, otBase.ValueRecord], |
|
] |
|
|
|
|
|
|
|
def _getClassRanges(glyphIDs: Iterable[int]): |
|
glyphIDs = sorted(glyphIDs) |
|
last = glyphIDs[0] |
|
ranges = [[last]] |
|
for glyphID in glyphIDs[1:]: |
|
if glyphID != last + 1: |
|
ranges[-1].append(last) |
|
ranges.append([glyphID]) |
|
last = glyphID |
|
ranges[-1].append(last) |
|
return ranges, glyphIDs[0], glyphIDs[-1] |
|
|
|
|
|
|
|
def _classDef_bytes( |
|
class_data: List[Tuple[List[Tuple[int, int]], int, int]], |
|
class_ids: List[int], |
|
coverage=False, |
|
): |
|
if not class_ids: |
|
return 0 |
|
first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]] |
|
range_count = len(first_ranges) |
|
for i in class_ids[1:]: |
|
data = class_data[i] |
|
range_count += len(data[0]) |
|
min_glyph_id = min(min_glyph_id, data[1]) |
|
max_glyph_id = max(max_glyph_id, data[2]) |
|
glyphCount = max_glyph_id - min_glyph_id + 1 |
|
|
|
format1_bytes = 6 + glyphCount * 2 |
|
|
|
format2_bytes = 4 + range_count * 6 |
|
return min(format1_bytes, format2_bytes) |
|
|
|
|
|
ClusteringContext = namedtuple( |
|
"ClusteringContext", |
|
[ |
|
"lines", |
|
"all_class1", |
|
"all_class1_data", |
|
"all_class2_data", |
|
"valueFormat1_bytes", |
|
"valueFormat2_bytes", |
|
], |
|
) |
|
|
|
|
|
class Cluster: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost" |
|
|
|
def __init__(self, ctx: ClusteringContext, indices_bitmask: int): |
|
self.ctx = ctx |
|
self.indices_bitmask = indices_bitmask |
|
self._indices = None |
|
self._column_indices = None |
|
self._cost = None |
|
|
|
@property |
|
def indices(self): |
|
if self._indices is None: |
|
self._indices = bit_indices(self.indices_bitmask) |
|
return self._indices |
|
|
|
@property |
|
def column_indices(self): |
|
if self._column_indices is None: |
|
|
|
|
|
bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices)) |
|
self._column_indices = bit_indices(bitmask) |
|
return self._column_indices |
|
|
|
@property |
|
def width(self): |
|
|
|
return len(self.column_indices) + 1 |
|
|
|
@property |
|
def cost(self): |
|
if self._cost is None: |
|
self._cost = ( |
|
|
|
2 |
|
|
|
|
|
|
|
+ 2 |
|
|
|
+ 2 |
|
+ self.coverage_bytes |
|
|
|
+ 2 |
|
|
|
+ 2 |
|
|
|
+ 2 |
|
+ self.classDef1_bytes |
|
|
|
+ 2 |
|
+ self.classDef2_bytes |
|
|
|
+ 2 |
|
|
|
+ 2 |
|
|
|
+ (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes) |
|
* len(self.indices) |
|
* self.width |
|
) |
|
return self._cost |
|
|
|
@property |
|
def coverage_bytes(self): |
|
format1_bytes = ( |
|
|
|
|
|
|
|
4 |
|
|
|
+ sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2 |
|
) |
|
ranges = sorted( |
|
chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices) |
|
) |
|
merged_range_count = 0 |
|
last = None |
|
for start, end in ranges: |
|
if last is not None and start != last + 1: |
|
merged_range_count += 1 |
|
last = end |
|
format2_bytes = ( |
|
|
|
|
|
|
|
4 |
|
|
|
|
|
|
|
|
|
+ merged_range_count * 6 |
|
) |
|
return min(format1_bytes, format2_bytes) |
|
|
|
@property |
|
def classDef1_bytes(self): |
|
|
|
|
|
|
|
|
|
|
|
biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i])) |
|
return _classDef_bytes( |
|
self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index] |
|
) |
|
|
|
@property |
|
def classDef2_bytes(self): |
|
|
|
return _classDef_bytes(self.ctx.all_class2_data, self.column_indices) |
|
|
|
|
|
def cluster_pairs_by_class2_coverage_custom_cost( |
|
font: TTFont, |
|
pairs: Pairs, |
|
compression: int = 5, |
|
) -> List[Pairs]: |
|
if not pairs: |
|
|
|
return [pairs] |
|
|
|
|
|
all_class1 = sorted(set(pair[0] for pair in pairs)) |
|
all_class2 = sorted(set(pair[1] for pair in pairs)) |
|
|
|
|
|
lines = [ |
|
sum( |
|
1 << i if (class1, class2) in pairs else 0 |
|
for i, class2 in enumerate(all_class2) |
|
) |
|
for class1 in all_class1 |
|
] |
|
|
|
|
|
name_to_id = font.getReverseGlyphMap() |
|
|
|
all_class1_data = [ |
|
_getClassRanges(name_to_id[name] for name in cls) for cls in all_class1 |
|
] |
|
all_class2_data = [ |
|
_getClassRanges(name_to_id[name] for name in cls) for cls in all_class2 |
|
] |
|
|
|
format1 = 0 |
|
format2 = 0 |
|
for pair, value in pairs.items(): |
|
format1 |= value[0].getEffectiveFormat() if value[0] else 0 |
|
format2 |= value[1].getEffectiveFormat() if value[1] else 0 |
|
valueFormat1_bytes = bit_count(format1) * 2 |
|
valueFormat2_bytes = bit_count(format2) * 2 |
|
|
|
ctx = ClusteringContext( |
|
lines, |
|
all_class1, |
|
all_class1_data, |
|
all_class2_data, |
|
valueFormat1_bytes, |
|
valueFormat2_bytes, |
|
) |
|
|
|
cluster_cache: Dict[int, Cluster] = {} |
|
|
|
def make_cluster(indices: int) -> Cluster: |
|
cluster = cluster_cache.get(indices, None) |
|
if cluster is not None: |
|
return cluster |
|
cluster = Cluster(ctx, indices) |
|
cluster_cache[indices] = cluster |
|
return cluster |
|
|
|
def merge(cluster: Cluster, other: Cluster) -> Cluster: |
|
return make_cluster(cluster.indices_bitmask | other.indices_bitmask) |
|
|
|
|
|
|
|
|
|
|
|
clusters = [make_cluster(1 << i) for i in range(len(lines))] |
|
|
|
|
|
|
|
cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost |
|
log.debug(f" len(clusters) = {len(clusters)}") |
|
|
|
while len(clusters) > 1: |
|
lowest_cost_change = None |
|
best_cluster_index = None |
|
best_other_index = None |
|
best_merged = None |
|
for i, cluster in enumerate(clusters): |
|
for j, other in enumerate(clusters[i + 1 :]): |
|
merged = merge(cluster, other) |
|
cost_change = merged.cost - cluster.cost - other.cost |
|
if lowest_cost_change is None or cost_change < lowest_cost_change: |
|
lowest_cost_change = cost_change |
|
best_cluster_index = i |
|
best_other_index = i + 1 + j |
|
best_merged = merged |
|
assert lowest_cost_change is not None |
|
assert best_cluster_index is not None |
|
assert best_other_index is not None |
|
assert best_merged is not None |
|
|
|
|
|
|
|
|
|
|
|
|
|
if lowest_cost_change > 0: |
|
|
|
|
|
cost_after_splitting = sum(c.cost for c in clusters) |
|
|
|
|
|
size_reduction = 1 - cost_after_splitting / cost_before_splitting |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
max_new_subtables = -log2(1 - size_reduction) * compression |
|
log.debug( |
|
f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}", |
|
) |
|
if compression == 9: |
|
|
|
max_new_subtables = len(clusters) |
|
|
|
|
|
|
|
if len(clusters) <= max_new_subtables + 1: |
|
break |
|
|
|
|
|
del clusters[best_other_index] |
|
clusters[best_cluster_index] = best_merged |
|
|
|
|
|
pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict) |
|
for pair, values in pairs.items(): |
|
pairs_by_class1[pair[0]][pair] = values |
|
pairs_groups: List[Pairs] = [] |
|
for cluster in clusters: |
|
pairs_group: Pairs = dict() |
|
for i in cluster.indices: |
|
class1 = all_class1[i] |
|
pairs_group.update(pairs_by_class1[class1]) |
|
pairs_groups.append(pairs_group) |
|
return pairs_groups |
|
|