1
//! Default / concrete implementations of the text3 trait abstractions.
2
//!
3
//! This module bridges the generic text3 layout engine and the concrete
4
//! `FontRef` / `ParsedFont` types.  It provides:
5
//!
6
//! - `ParsedFontTrait` implementation for `FontRef`
7
//! - Font loading via `PathLoader`
8
//! - The core `shape_text_internal` shaping function
9

            
10
use std::{path::Path, sync::Arc};
11

            
12
use allsorts::{
13
    gpos,
14
    gsub::{self, FeatureInfo, FeatureMask, Features},
15
};
16
use azul_core::geom::LogicalSize;
17
use azul_css::props::basic::FontRef;
18
use rust_fontconfig::FcFontCache;
19

            
20
use crate::{
21
    font::parsed::ParsedFont,
22
    text3::{
23
        cache::{
24
            BidiDirection, BidiLevel, FontManager, FontSelector, FontVariantCaps,
25
            FontVariantLigatures, FontVariantNumeric, Glyph, GlyphOrientation, GlyphSource,
26
            LayoutError, LayoutFontMetrics, ParsedFontTrait, Point, ShallowClone, StyleProperties,
27
            TextCombineUpright, TextDecoration, TextOrientation, VerticalMetrics, WritingMode,
28
        },
29
        script::Script,
30
    },
31
};
32

            
33
/// Creates a FontRef from font bytes by parsing them into a ParsedFont.
34
///
35
/// This is a bridge function that:
36
///
37
/// 1. Parses the bytes into a ParsedFont
38
/// 2. Wraps it in a FontRef with proper reference counting
39
///
40
/// # Arguments
41
///
42
/// - `font_bytes` - The raw font file data
43
/// - `font_index` - Index of the font in a font collection (0 for single fonts)
44
/// - `parse_outlines` - Whether to parse glyph outlines (expensive, usually false for layout)
45
pub fn font_ref_from_bytes(
46
    font_bytes: &[u8],
47
    font_index: usize,
48
    parse_outlines: bool,
49
) -> Option<FontRef> {
50
    // Parse the font bytes into ParsedFont
51
    let mut warnings = Vec::new();
52
    let parsed_font = ParsedFont::from_bytes(font_bytes, font_index, &mut warnings)?;
53

            
54
    Some(crate::parsed_font_to_font_ref(parsed_font))
55
}
56

            
57
/// A FontLoader that parses font data from a byte slice.
58
///
59
/// It is designed to be used in conjunction with a mechanism that reads font files
60
/// from paths into memory. This loader simply handles the parsing aspect.
61
#[derive(Debug, Default, Clone)]
62
pub struct PathLoader;
63

            
64
impl PathLoader {
65
    /// Creates a new `PathLoader`.
66
4690
    pub fn new() -> Self {
67
4690
        PathLoader
68
4690
    }
69

            
70
    /// Read a font from disk and parse via the lazy-LocaGlyf path.
71
    /// Convenience wrapper for callers that have a path but no
72
    /// `Arc<FontBytes>` yet — uses a heap read (`Owned`) since a
73
    /// loose path won't go through the fontconfig dedup cache.
74
    pub fn load_from_path(&self, path: &Path, font_index: usize) -> Result<FontRef, LayoutError> {
75
        let font_bytes = std::fs::read(path).map_err(|_| {
76
            LayoutError::FontNotFound(FontSelector {
77
                family: path.to_string_lossy().into_owned(),
78
                weight: rust_fontconfig::FcWeight::Normal,
79
                style: crate::text3::cache::FontStyle::Normal,
80
                unicode_ranges: Vec::new(),
81
            })
82
        })?;
83
        let arc_owned = std::sync::Arc::<[u8]>::from(font_bytes);
84
        let bytes = std::sync::Arc::new(rust_fontconfig::FontBytes::Owned(arc_owned));
85
        self.load_font_shared(bytes, font_index)
86
    }
87

            
88
    /// Lazy-friendly loader: takes an `Arc<FontBytes>` (typically
89
    /// from [`rust_fontconfig::FcFontCache::get_font_bytes`]) and
90
    /// uses the [`ParsedFont::from_bytes_shared`] constructor so
91
    /// `LocaGlyf::load` is deferred until the first glyph decode.
92
    ///
93
    /// This is the only loader on the production path —
94
    /// `load_fonts_from_disk` calls this via the closure passed
95
    /// into `FontManager::load_missing_for_chains`. Fonts that
96
    /// never get rasterized (common — every face of a `.ttc` gets a
97
    /// FontId, but pages only hit a couple of them) skip their
98
    /// per-face loca+glyf materialisation entirely; with
99
    /// `FontBytes::Mmapped` the unread pages also never count
100
    /// toward RSS.
101
21175
    pub fn load_font_shared(
102
21175
        &self,
103
21175
        font_bytes: std::sync::Arc<rust_fontconfig::FontBytes>,
104
21175
        font_index: usize,
105
21175
    ) -> Result<FontRef, LayoutError> {
106
21175
        let mut warnings = Vec::new();
107
21175
        let parsed_font = ParsedFont::from_bytes_shared(font_bytes, font_index, &mut warnings)
108
21175
            .ok_or_else(|| {
109
                LayoutError::ShapingError("Failed to parse font with allsorts".to_string())
110
            })?;
111
21175
        Ok(crate::parsed_font_to_font_ref(parsed_font))
112
21175
    }
113
}
114

            
115
impl FontManager<FontRef> {
116
    pub fn new_with_fc_cache(fc_cache: FcFontCache) -> Result<Self, LayoutError> {
117
        FontManager::new(fc_cache)
118
    }
119

            
120
    /// Evict the cached `LocaGlyf` for every face that hasn't had a
121
    /// `get_or_decode_glyph` call within the last `idle` duration.
122
    /// Only `LocaGlyfState::Deferred` faces (the production lazy
123
    /// path) can be evicted — they keep their source `Arc<[u8]>` so
124
    /// the next glyph access re-parses cheaply. `LocaGlyfState::Loaded`
125
    /// faces from the eager path stay put.
126
    ///
127
    /// Returns the number of faces evicted. Embedders can call this
128
    /// from a memory-pressure hook or on a timer; servo-shot
129
    /// exposes it via `--azul-evict-after-each` for measurement.
130
    pub fn evict_unused(&self, idle: std::time::Duration) -> usize {
131
        use crate::font::parsed::ParsedFont;
132
        let parsed = match self.parsed_fonts.lock() {
133
            Ok(p) => p,
134
            Err(_) => return 0,
135
        };
136
        // We compare against the same monotonic clock the font's
137
        // `last_used` is sampled from. `last_used == 0` means
138
        // "never touched" -> eligible. Otherwise we only evict if
139
        // `now_nanos - last_used >= idle.as_nanos()`.
140
        let cutoff = idle.as_nanos() as u64;
141
        let now_nanos = crate::font::parsed::monotonic_now_nanos();
142
        let mut evicted = 0usize;
143
        for font_ref in parsed.values() {
144
            let font: &ParsedFont = get_parsed_font(font_ref);
145
            let last = font.last_used_nanos();
146
            // Untouched faces are eligible immediately. Touched
147
            // faces need to be `idle` past their last use.
148
            let stale = last == 0 || now_nanos.saturating_sub(last) >= cutoff;
149
            if stale && font.evict_loca_glyf() {
150
                evicted += 1;
151
            }
152
        }
153
        evicted
154
    }
155
}
156

            
157

            
158
// ParsedFontTrait Implementation for FontRef
159

            
160
// Implement ShallowClone for FontRef
161
impl crate::text3::cache::ShallowClone for FontRef {
162
108570
    fn shallow_clone(&self) -> Self {
163
        // FontRef::clone increments the reference count
164
108570
        self.clone()
165
108570
    }
166
}
167

            
168
// Helper to get the inner ParsedFont from FontRef
169
#[inline]
170
122675
fn get_parsed_font(font_ref: &FontRef) -> &ParsedFont {
171
122675
    unsafe { &*(font_ref.get_parsed() as *const ParsedFont) }
172
122675
}
173

            
174
impl ParsedFontTrait for FontRef {
175
    // +spec:block-formatting-context:21ec9a - bidi direction handled during text shaping for vertical writing modes
176
5495
    fn shape_text(
177
5495
        &self,
178
5495
        text: &str,
179
5495
        script: Script,
180
5495
        language: crate::text3::script::Language,
181
5495
        direction: BidiDirection,
182
5495
        style: &StyleProperties,
183
5495
    ) -> Result<Vec<Glyph>, LayoutError> {
184
        // Delegate to the inner ParsedFont's shape_text, passing self as font_ref
185
5495
        let parsed = get_parsed_font(self);
186
5495
        parsed.shape_text_for_font_ref(self, text, script, language, direction, style)
187
5495
    }
188

            
189
117180
    fn get_hash(&self) -> u64 {
190
117180
        get_parsed_font(self).hash
191
117180
    }
192

            
193
    fn get_glyph_size(&self, glyph_id: u16, font_size: f32) -> Option<LogicalSize> {
194
        get_parsed_font(self).get_glyph_size(glyph_id, font_size)
195
    }
196

            
197
    fn get_hyphen_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
198
        get_parsed_font(self).get_hyphen_glyph_and_advance(font_size)
199
    }
200

            
201
    fn get_kashida_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
202
        get_parsed_font(self).get_kashida_glyph_and_advance(font_size)
203
    }
204

            
205
    fn has_glyph(&self, codepoint: u32) -> bool {
206
        get_parsed_font(self).has_glyph(codepoint)
207
    }
208

            
209
    fn get_vertical_metrics(&self, glyph_id: u16) -> Option<VerticalMetrics> {
210
        get_parsed_font(self).get_vertical_metrics(glyph_id)
211
    }
212

            
213
    fn get_font_metrics(&self) -> LayoutFontMetrics {
214
        get_parsed_font(self).font_metrics.clone()
215
    }
216

            
217
    fn num_glyphs(&self) -> u16 {
218
        get_parsed_font(self).num_glyphs
219
    }
220

            
221
    fn get_space_width(&self) -> Option<usize> {
222
        get_parsed_font(self).get_space_width()
223
    }
224
}
225

            
226
/// Extension trait for FontRef to provide access to font bytes and metrics
227
///
228
/// This trait provides methods that require access to the inner ParsedFont data.
229
pub trait FontRefExt {
230
    /// Get the original font bytes. Returns an empty slice when the
231
    /// underlying `ParsedFont` was created without retaining its
232
    /// source bytes (the default since the lazy-font-loading refactor).
233
    /// Callers that need the bytes for PDF embedding must construct
234
    /// the `ParsedFont` via `ParsedFont::with_source_bytes`.
235
    fn get_bytes(&self) -> &[u8];
236
    /// Get the full font metrics (PDF-style metrics from HEAD, HHEA, OS/2 tables)
237
    fn get_full_font_metrics(&self) -> azul_css::props::basic::FontMetrics;
238
}
239

            
240
impl FontRefExt for FontRef {
241
    fn get_bytes(&self) -> &[u8] {
242
        get_parsed_font(self)
243
            .original_bytes
244
            .as_ref()
245
            .map(|b| b.as_slice())
246
            .unwrap_or(&[])
247
    }
248

            
249
    fn get_full_font_metrics(&self) -> azul_css::props::basic::FontMetrics {
250
        use azul_css::{OptionI16, OptionU16, OptionU32};
251

            
252
        let parsed = get_parsed_font(self);
253
        let pdf = &parsed.pdf_font_metrics;
254

            
255
        // PdfFontMetrics only has a subset of fields; fill others with defaults
256
        azul_css::props::basic::FontMetrics {
257
            // OS/2 version 1 fields (u32 - align 4, placed first)
258
            ul_code_page_range1: OptionU32::None,
259
            ul_code_page_range2: OptionU32::None,
260

            
261
            // OS/2 table (u32 fields)
262
            ul_unicode_range1: 0,   // Not in PdfFontMetrics
263
            ul_unicode_range2: 0,   // Not in PdfFontMetrics
264
            ul_unicode_range3: 0,   // Not in PdfFontMetrics
265
            ul_unicode_range4: 0,   // Not in PdfFontMetrics
266
            ach_vend_id: 0,         // Not in PdfFontMetrics
267

            
268
            // OS/2 version 0 fields (optional)
269
            s_typo_ascender: OptionI16::None,
270
            s_typo_descender: OptionI16::None,
271
            s_typo_line_gap: OptionI16::None,
272
            us_win_ascent: OptionU16::None,
273
            us_win_descent: OptionU16::None,
274

            
275
            // OS/2 version 2 fields (optional)
276
            sx_height: OptionI16::None,
277
            s_cap_height: OptionI16::None,
278
            us_default_char: OptionU16::None,
279
            us_break_char: OptionU16::None,
280
            us_max_context: OptionU16::None,
281

            
282
            // OS/2 version 3 fields (optional)
283
            us_lower_optical_point_size: OptionU16::None,
284
            us_upper_optical_point_size: OptionU16::None,
285

            
286
            // HEAD table fields
287
            units_per_em: pdf.units_per_em,
288
            font_flags: pdf.font_flags,
289
            x_min: pdf.x_min,
290
            y_min: pdf.y_min,
291
            x_max: pdf.x_max,
292
            y_max: pdf.y_max,
293

            
294
            // HHEA table fields
295
            ascender: pdf.ascender,
296
            descender: pdf.descender,
297
            line_gap: pdf.line_gap,
298
            advance_width_max: pdf.advance_width_max,
299
            min_left_side_bearing: 0,  // Not in PdfFontMetrics
300
            min_right_side_bearing: 0, // Not in PdfFontMetrics
301
            x_max_extent: 0,           // Not in PdfFontMetrics
302
            caret_slope_rise: pdf.caret_slope_rise,
303
            caret_slope_run: pdf.caret_slope_run,
304
            caret_offset: 0,  // Not in PdfFontMetrics
305
            num_h_metrics: 0, // Not in PdfFontMetrics
306

            
307
            // OS/2 table fields
308
            x_avg_char_width: pdf.x_avg_char_width,
309
            us_weight_class: pdf.us_weight_class,
310
            us_width_class: pdf.us_width_class,
311
            fs_type: 0,                // Not in PdfFontMetrics
312
            y_subscript_x_size: 0,     // Not in PdfFontMetrics
313
            y_subscript_y_size: 0,     // Not in PdfFontMetrics
314
            y_subscript_x_offset: 0,   // Not in PdfFontMetrics
315
            y_subscript_y_offset: 0,   // Not in PdfFontMetrics
316
            y_superscript_x_size: 0,   // Not in PdfFontMetrics
317
            y_superscript_y_size: 0,   // Not in PdfFontMetrics
318
            y_superscript_x_offset: 0, // Not in PdfFontMetrics
319
            y_superscript_y_offset: 0, // Not in PdfFontMetrics
320
            y_strikeout_size: pdf.y_strikeout_size,
321
            y_strikeout_position: pdf.y_strikeout_position,
322
            s_family_class: 0, // Not in PdfFontMetrics
323
            fs_selection: 0,        // Not in PdfFontMetrics
324
            us_first_char_index: 0, // Not in PdfFontMetrics
325
            us_last_char_index: 0,  // Not in PdfFontMetrics
326

            
327
            // Panose (align 1 - last)
328
            panose: azul_css::props::basic::Panose::zero(),
329
        }
330
    }
331
}
332

            
333
// ParsedFont helper method for FontRef
334
//
335
// This allows ParsedFont to create glyphs that use FontRef
336
//
337
// FontRef is just a C-style Arc wrapper around ParsedFont, so we delegate to
338
// the common shaping implementation and convert the font reference type.
339

            
340
impl ParsedFont {
341
    /// Internal helper that shapes text and returns Glyph
342
    /// Delegates to shape_text_internal and converts the font reference.
343
5495
    fn shape_text_for_font_ref(
344
5495
        &self,
345
5495
        font_ref: &FontRef,
346
5495
        text: &str,
347
5495
        script: Script,
348
5495
        language: crate::text3::script::Language,
349
5495
        direction: BidiDirection,
350
5495
        style: &StyleProperties,
351
5495
    ) -> Result<Vec<Glyph>, LayoutError> {
352
        // Use the common shaping implementation
353
5495
        let shaped = shape_text_internal(self, text, script, language, direction, style)?;
354

            
355
        // Convert Glyph - now using font_hash and font_metrics instead of font reference
356
5495
        let font_hash = font_ref.get_hash();
357
5495
        let font_metrics = LayoutFontMetrics {
358
5495
            ascent: self.font_metrics.ascent,
359
5495
            descent: self.font_metrics.descent,
360
5495
            line_gap: self.font_metrics.line_gap,
361
5495
            units_per_em: self.font_metrics.units_per_em,
362
5495
            x_height: self.font_metrics.x_height,
363
5495
            cap_height: self.font_metrics.cap_height,
364
5495
        };
365

            
366
5495
        Ok(shaped
367
5495
            .into_iter()
368
5495
            .map(|g| Glyph {
369
60585
                glyph_id: g.glyph_id,
370
60585
                codepoint: g.codepoint,
371
60585
                font_hash,
372
60585
                font_metrics: font_metrics.clone(),
373
60585
                style: g.style,
374
60585
                source: g.source,
375
60585
                logical_byte_index: g.logical_byte_index,
376
60585
                logical_byte_len: g.logical_byte_len,
377
60585
                content_index: g.content_index,
378
60585
                cluster: g.cluster,
379
60585
                advance: g.advance,
380
60585
                kerning: g.kerning,
381
60585
                offset: g.offset,
382
60585
                vertical_advance: g.vertical_advance,
383
60585
                vertical_origin_y: g.vertical_origin_y,
384
60585
                vertical_bearing: g.vertical_bearing,
385
60585
                orientation: g.orientation,
386
60585
                script: g.script,
387
60585
                bidi_level: g.bidi_level,
388
60585
            })
389
5495
            .collect())
390
5495
    }
391

            
392
5495
    fn get_hash(&self) -> u64 {
393
5495
        self.hash
394
5495
    }
395

            
396
    fn get_glyph_size(&self, glyph_id: u16, font_size_px: f32) -> Option<LogicalSize> {
397
        self.get_or_decode_glyph(glyph_id).map(|record| {
398
            let units_per_em = self.font_metrics.units_per_em as f32;
399
            let scale_factor = if units_per_em > 0.0 {
400
                font_size_px / units_per_em
401
            } else {
402
                FALLBACK_SCALE
403
            };
404

            
405
            // max_x, max_y, min_x, min_y in font units
406
            let bbox = &record.bounding_box;
407

            
408
            LogicalSize {
409
                width: (bbox.max_x - bbox.min_x) as f32 * scale_factor,
410
                height: (bbox.max_y - bbox.min_y) as f32 * scale_factor,
411
            }
412
        })
413
    }
414

            
415
    fn get_hyphen_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
416
        let glyph_id = self.lookup_glyph_index('-' as u32)?;
417
        let advance_units = self.get_horizontal_advance(glyph_id);
418
        let scale_factor = if self.font_metrics.units_per_em > 0 {
419
            font_size / (self.font_metrics.units_per_em as f32)
420
        } else {
421
            return None;
422
        };
423
        let scaled_advance = advance_units as f32 * scale_factor;
424
        Some((glyph_id, scaled_advance))
425
    }
426

            
427
    fn get_kashida_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
428
        // U+0640 is the Arabic Tatweel character, used for kashida justification.
429
        let glyph_id = self.lookup_glyph_index('\u{0640}' as u32)?;
430
        let advance_units = self.get_horizontal_advance(glyph_id);
431
        let scale_factor = if self.font_metrics.units_per_em > 0 {
432
            font_size / (self.font_metrics.units_per_em as f32)
433
        } else {
434
            return None;
435
        };
436
        let scaled_advance = advance_units as f32 * scale_factor;
437
        Some((glyph_id, scaled_advance))
438
    }
439
}
440

            
441
/// Fallback scale factor when `units_per_em` is zero (corrupt/broken font).
442
const FALLBACK_SCALE: f32 = 0.01;
443

            
444
// Helper Functions
445

            
446
/// Builds a FeatureMask with the appropriate OpenType features for a given script.
447
/// This ensures proper text shaping for complex scripts like Arabic, Devanagari, etc.
448
///
449
/// The function includes:
450
/// - Common features for all scripts (ligatures, contextual alternates, etc.)
451
/// - Script-specific features (positional forms for Arabic, conjuncts for Indic, etc.)
452
///
453
/// This is designed to be stable and explicit - we control exactly which features
454
/// are enabled rather than relying on allsorts' defaults which may change.
455
5495
fn build_feature_mask_for_script(script: Script) -> FeatureMask {
456
    use Script::*;
457

            
458
    // Start with common features that apply to most scripts
459
5495
    let mut mask = FeatureMask::default(); // Includes: CALT, CCMP, CLIG, LIGA, LOCL, RLIG
460

            
461
    // Add script-specific features
462
5495
    match script {
463
        // Arabic and related scripts - require positional forms
464
        Arabic => {
465
            mask |= FeatureMask::INIT; // Initial forms (at start of word)
466
            mask |= FeatureMask::MEDI; // Medial forms (middle of word)
467
            mask |= FeatureMask::FINA; // Final forms (end of word)
468
            mask |= FeatureMask::ISOL; // Isolated forms (standalone)
469
                                       // Note: RLIG (required ligatures) already in default for
470
                                       // lam-alef ligatures
471
        }
472

            
473
        // Indic scripts - require complex conjunct formation and reordering
474
        Devanagari | Bengali | Gujarati | Gurmukhi | Kannada | Malayalam | Oriya | Tamil
475
        | Telugu => {
476
            mask |= FeatureMask::NUKT; // Nukta forms
477
            mask |= FeatureMask::AKHN; // Akhand ligatures
478
            mask |= FeatureMask::RPHF; // Reph form
479
            mask |= FeatureMask::RKRF; // Rakar form
480
            mask |= FeatureMask::PREF; // Pre-base forms
481
            mask |= FeatureMask::BLWF; // Below-base forms
482
            mask |= FeatureMask::ABVF; // Above-base forms
483
            mask |= FeatureMask::HALF; // Half forms
484
            mask |= FeatureMask::PSTF; // Post-base forms
485
            mask |= FeatureMask::VATU; // Vattu variants
486
            mask |= FeatureMask::CJCT; // Conjunct forms
487
        }
488

            
489
        // Myanmar (Burmese) - has complex reordering
490
        Myanmar => {
491
            mask |= FeatureMask::PREF; // Pre-base forms
492
            mask |= FeatureMask::BLWF; // Below-base forms
493
            mask |= FeatureMask::PSTF; // Post-base forms
494
        }
495

            
496
        // Khmer - has complex reordering and stacking
497
        Khmer => {
498
            mask |= FeatureMask::PREF; // Pre-base forms
499
            mask |= FeatureMask::BLWF; // Below-base forms
500
            mask |= FeatureMask::ABVF; // Above-base forms
501
            mask |= FeatureMask::PSTF; // Post-base forms
502
        }
503

            
504
        // Thai - has tone marks and vowel reordering
505
        Thai => {
506
            // Thai mostly uses default features, but may have some special marks
507
            // The default mask is sufficient for most Thai fonts
508
        }
509

            
510
        // Hebrew - may have contextual forms but less complex than Arabic
511
        Hebrew => {
512
            // Hebrew fonts may use contextual alternates already in default
513
            // Some fonts have special features but they're rare
514
        }
515

            
516
        // Hangul (Korean) - has complex syllable composition
517
        Hangul => {
518
            // Note: Hangul jamo features (LJMO, VJMO, TJMO) are not available in allsorts'
519
            // FeatureMask Most modern Hangul fonts work correctly with the default
520
            // features as syllable composition is usually handled at a lower level
521
        }
522

            
523
        // Ethiopic - has syllabic script with some ligatures
524
        Ethiopic => {
525
            // Default features are usually sufficient
526
            // LIGA and CLIG already in default mask
527
        }
528

            
529
        // Latin, Greek, Cyrillic - standard features are sufficient
530
5495
        Latin | Greek | Cyrillic => {
531
5495
            // Default mask includes all needed features:
532
5495
            // - LIGA: standard ligatures (fi, fl, etc.)
533
5495
            // - CLIG: contextual ligatures
534
5495
            // - CALT: contextual alternates
535
5495
            // - CCMP: mark composition
536
5495
        }
537

            
538
        // Georgian - uses standard features
539
        Georgian => {
540
            // Default features sufficient
541
        }
542

            
543
        // CJK scripts (Hiragana, Katakana, Mandarin/Hani)
544
        Hiragana | Katakana | Mandarin => {
545
            // CJK fonts may use vertical alternates, but those are controlled
546
            // by writing-mode, not GSUB features in the horizontal direction.
547
            // Default features are sufficient.
548
        }
549

            
550
        // Sinhala - Indic-derived but simpler
551
        Sinhala => {
552
            mask |= FeatureMask::AKHN; // Akhand ligatures
553
            mask |= FeatureMask::RPHF; // Reph form
554
            mask |= FeatureMask::VATU; // Vattu variants
555
        }
556
    }
557

            
558
5495
    mask
559
5495
}
560

            
561
/// Maps the layout engine's `Script` enum to an OpenType script tag `u32`.
562
5495
fn to_opentype_script_tag(script: Script) -> u32 {
563
    use Script::*;
564
    // Tags from https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
565
5495
    match script {
566
        Arabic => u32::from_be_bytes(*b"arab"),
567
        Bengali => u32::from_be_bytes(*b"beng"),
568
        Cyrillic => u32::from_be_bytes(*b"cyrl"),
569
        Devanagari => u32::from_be_bytes(*b"deva"),
570
        Ethiopic => u32::from_be_bytes(*b"ethi"),
571
        Georgian => u32::from_be_bytes(*b"geor"),
572
        Greek => u32::from_be_bytes(*b"grek"),
573
        Gujarati => u32::from_be_bytes(*b"gujr"),
574
        Gurmukhi => u32::from_be_bytes(*b"guru"),
575
        Hangul => u32::from_be_bytes(*b"hang"),
576
        Hebrew => u32::from_be_bytes(*b"hebr"),
577
        // OpenType does not define a separate Hiragana script tag;
578
        // both Hiragana and Katakana intentionally use "kana".
579
        Hiragana => u32::from_be_bytes(*b"kana"),
580
        Kannada => u32::from_be_bytes(*b"knda"),
581
        Katakana => u32::from_be_bytes(*b"kana"),
582
        Khmer => u32::from_be_bytes(*b"khmr"),
583
5495
        Latin => u32::from_be_bytes(*b"latn"),
584
        Malayalam => u32::from_be_bytes(*b"mlym"),
585
        Mandarin => u32::from_be_bytes(*b"hani"),
586
        Myanmar => u32::from_be_bytes(*b"mymr"),
587
        Oriya => u32::from_be_bytes(*b"orya"),
588
        Sinhala => u32::from_be_bytes(*b"sinh"),
589
        Tamil => u32::from_be_bytes(*b"taml"),
590
        Telugu => u32::from_be_bytes(*b"telu"),
591
        Thai => u32::from_be_bytes(*b"thai"),
592
    }
593
5495
}
594

            
595
/// Parses a CSS-style font-feature-settings string like `"liga"`, `"liga=0"`, or `"ss01"`.
596
/// Returns an OpenType tag and a value.
597
fn parse_font_feature(feature_str: &str) -> Option<(u32, u32)> {
598
    let mut parts = feature_str.split('=');
599
    let tag_str = parts.next()?.trim();
600
    let value_str = parts.next().unwrap_or("1").trim(); // Default to 1 (on) if no value
601

            
602
    // OpenType feature tags must be 4 characters long.
603
    if tag_str.len() > 4 {
604
        return None;
605
    }
606
    // Pad with spaces if necessary
607
    let padded_tag_str = format!("{:<4}", tag_str);
608

            
609
    let tag = u32::from_be_bytes(padded_tag_str.as_bytes().try_into().ok()?);
610
    let value = value_str.parse::<u32>().ok()?;
611

            
612
    Some((tag, value))
613
}
614

            
615
/// A helper to add OpenType features based on CSS `font-variant-*` properties.
616
5495
fn add_variant_features(style: &StyleProperties, features: &mut Vec<FeatureInfo>) {
617
    // Helper to add a feature that is simply "on".
618
5495
    let mut add_on = |tag_str: &[u8; 4]| {
619
        features.push(FeatureInfo {
620
            feature_tag: u32::from_be_bytes(*tag_str),
621
            alternate: None,
622
        });
623
    };
624

            
625
    // Note on disabling features: The CSS properties `font-variant-ligatures: none` or
626
    // `no-common-ligatures` are meant to disable features that may be on by default for a
627
    // given script. The `allsorts` API for applying custom features is additive and does not
628
    // currently support disabling default features. This implementation only handles enabling
629
    // non-default features.
630

            
631
    // Ligatures
632
5495
    match style.font_variant_ligatures {
633
        FontVariantLigatures::Discretionary => add_on(b"dlig"),
634
        FontVariantLigatures::Historical => add_on(b"hlig"),
635
        FontVariantLigatures::Contextual => add_on(b"calt"),
636
5495
        _ => {} // Other cases are either default-on or require disabling.
637
    }
638

            
639
    // Caps
640
5495
    match style.font_variant_caps {
641
        FontVariantCaps::SmallCaps => add_on(b"smcp"),
642
        FontVariantCaps::AllSmallCaps => {
643
            add_on(b"c2sc");
644
            add_on(b"smcp");
645
        }
646
        FontVariantCaps::PetiteCaps => add_on(b"pcap"),
647
        FontVariantCaps::AllPetiteCaps => {
648
            add_on(b"c2pc");
649
            add_on(b"pcap");
650
        }
651
        FontVariantCaps::Unicase => add_on(b"unic"),
652
        FontVariantCaps::TitlingCaps => add_on(b"titl"),
653
5495
        FontVariantCaps::Normal => {}
654
    }
655

            
656
    // Numeric
657
5495
    match style.font_variant_numeric {
658
        FontVariantNumeric::LiningNums => add_on(b"lnum"),
659
        FontVariantNumeric::OldstyleNums => add_on(b"onum"),
660
        FontVariantNumeric::ProportionalNums => add_on(b"pnum"),
661
        FontVariantNumeric::TabularNums => add_on(b"tnum"),
662
        FontVariantNumeric::DiagonalFractions => add_on(b"frac"),
663
        FontVariantNumeric::StackedFractions => add_on(b"afrc"),
664
        FontVariantNumeric::Ordinal => add_on(b"ordn"),
665
        FontVariantNumeric::SlashedZero => add_on(b"zero"),
666
5495
        FontVariantNumeric::Normal => {}
667
    }
668
5495
}
669

            
670
/// Maps the `hyphenation::Language` enum to an OpenType language tag `u32`.
671
#[cfg(feature = "text_layout_hyphenation")]
672
5495
fn to_opentype_lang_tag(lang: hyphenation::Language) -> u32 {
673
    use hyphenation::Language::*;
674
    // A complete list of language tags can be found at:
675
    // https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
676
5495
    let tag_bytes = match lang {
677
        Afrikaans => *b"AFK ",
678
        Albanian => *b"SQI ",
679
        Armenian => *b"HYE ",
680
        Assamese => *b"ASM ",
681
        Basque => *b"EUQ ",
682
        Belarusian => *b"BEL ",
683
        Bengali => *b"BEN ",
684
        Bulgarian => *b"BGR ",
685
        Catalan => *b"CAT ",
686
        Chinese => *b"ZHS ",
687
        Coptic => *b"COP ",
688
        Croatian => *b"HRV ",
689
        Czech => *b"CSY ",
690
        Danish => *b"DAN ",
691
        Dutch => *b"NLD ",
692
        EnglishGB => *b"ENG ",
693
5495
        EnglishUS => *b"ENU ",
694
        Esperanto => *b"ESP ",
695
        Estonian => *b"ETI ",
696
        Ethiopic => *b"ETH ",
697
        Finnish => *b"FIN ",
698
        FinnishScholastic => *b"FIN ",
699
        French => *b"FRA ",
700
        Friulan => *b"FRL ",
701
        Galician => *b"GLC ",
702
        Georgian => *b"KAT ",
703
        German1901 => *b"DEU ",
704
        German1996 => *b"DEU ",
705
        GermanSwiss => *b"DES ",
706
        GreekAncient => *b"GRC ",
707
        GreekMono => *b"ELL ",
708
        GreekPoly => *b"ELL ",
709
        Gujarati => *b"GUJ ",
710
        Hindi => *b"HIN ",
711
        Hungarian => *b"HUN ",
712
        Icelandic => *b"ISL ",
713
        Indonesian => *b"IND ",
714
        Interlingua => *b"INA ",
715
        Irish => *b"IRI ",
716
        Italian => *b"ITA ",
717
        Kannada => *b"KAN ",
718
        Kurmanji => *b"KUR ",
719
        Latin => *b"LAT ",
720
        LatinClassic => *b"LAT ",
721
        LatinLiturgical => *b"LAT ",
722
        Latvian => *b"LVI ",
723
        Lithuanian => *b"LTH ",
724
        Macedonian => *b"MKD ",
725
        Malayalam => *b"MAL ",
726
        Marathi => *b"MAR ",
727
        Mongolian => *b"MNG ",
728
        NorwegianBokmal => *b"NOR ",
729
        NorwegianNynorsk => *b"NYN ",
730
        Occitan => *b"OCI ",
731
        Oriya => *b"ORI ",
732
        Pali => *b"PLI ",
733
        Panjabi => *b"PAN ",
734
        Piedmontese => *b"PMS ",
735
        Polish => *b"PLK ",
736
        Portuguese => *b"PTG ",
737
        Romanian => *b"ROM ",
738
        Romansh => *b"RMC ",
739
        Russian => *b"RUS ",
740
        Sanskrit => *b"SAN ",
741
        SerbianCyrillic => *b"SRB ",
742
        SerbocroatianCyrillic => *b"SHC ",
743
        SerbocroatianLatin => *b"SHL ",
744
        SlavonicChurch => *b"CSL ",
745
        Slovak => *b"SKY ",
746
        Slovenian => *b"SLV ",
747
        Spanish => *b"ESP ",
748
        Swedish => *b"SVE ",
749
        Tamil => *b"TAM ",
750
        Telugu => *b"TEL ",
751
        Thai => *b"THA ",
752
        Turkish => *b"TRK ",
753
        Turkmen => *b"TUK ",
754
        Ukrainian => *b"UKR ",
755
        Uppersorbian => *b"HSB ",
756
        Welsh => *b"CYM ",
757
    };
758
5495
    u32::from_be_bytes(tag_bytes)
759
5495
}
760

            
761
/// Internal shaping implementation - the single source of truth for text shaping.
762
/// Both FontRef and ParsedFont use this function.
763
5495
fn shape_text_internal(
764
5495
    parsed_font: &ParsedFont,
765
5495
    text: &str,
766
5495
    script: Script,
767
5495
    language: crate::text3::script::Language,
768
5495
    direction: BidiDirection,
769
5495
    style: &StyleProperties,
770
5495
) -> Result<Vec<Glyph>, LayoutError> {
771
5495
    let script_tag = to_opentype_script_tag(script);
772
    #[cfg(feature = "text_layout_hyphenation")]
773
5495
    let lang_tag = to_opentype_lang_tag(language);
774
    #[cfg(not(feature = "text_layout_hyphenation"))]
775
    let lang_tag = 0u32;
776

            
777
    // +spec:text-alignment-spacing:4357e6 - non-zero letter-spacing should disable optional ligatures; allsorts API is additive-only so default liga cannot be disabled here
778
    // +spec:text-alignment-spacing:24d624 - cursive script letter-spacing behavior is advisory (outside CSS scope per spec note)
779
5495
    let mut user_features: Vec<FeatureInfo> = style
780
5495
        .font_features
781
5495
        .iter()
782
5495
        .filter_map(|s| parse_font_feature(s))
783
5495
        .map(|(tag, value)| FeatureInfo {
784
            feature_tag: tag,
785
            alternate: if value > 1 {
786
                Some(value as usize)
787
            } else {
788
                None
789
            },
790
        })
791
5495
        .collect();
792
5495
    add_variant_features(style, &mut user_features);
793

            
794
5495
    let opt_gdef = parsed_font.opt_gdef_table.as_ref().map(|v| &**v);
795

            
796
5495
    let mut raw_glyphs: Vec<allsorts::gsub::RawGlyph<()>> = text
797
5495
        .char_indices()
798
61040
        .filter_map(|(cluster, ch)| {
799
61040
            let glyph_index = parsed_font.lookup_glyph_index(ch as u32).unwrap_or(0);
800
61040
            if cluster > u16::MAX as usize {
801
                None
802
            } else {
803
                Some(allsorts::gsub::RawGlyph {
804
61040
                    unicodes: tinyvec::tiny_vec![[char; 1] => ch],
805
61040
                    glyph_index,
806
61040
                    liga_component_pos: cluster as u16,
807
61040
                    glyph_origin: allsorts::gsub::GlyphOrigin::Char(ch),
808
61040
                    flags: allsorts::gsub::RawGlyphFlags::empty(),
809
61040
                    extra_data: (),
810
61040
                    variation: None,
811
                })
812
            }
813
61040
        })
814
5495
        .collect();
815

            
816
5495
    if let Some(gsub) = parsed_font.gsub() {
817
5495
        let features = if user_features.is_empty() {
818
5495
            Features::Mask(build_feature_mask_for_script(script))
819
        } else {
820
            Features::Custom(user_features.clone())
821
        };
822

            
823
5495
        let dotted_circle_index = parsed_font
824
5495
            .lookup_glyph_index(allsorts::DOTTED_CIRCLE as u32)
825
5495
            .unwrap_or(0);
826
5495
        gsub::apply(
827
5495
            dotted_circle_index,
828
5495
            gsub,
829
5495
            opt_gdef,
830
5495
            script_tag,
831
5495
            Some(lang_tag),
832
5495
            &features,
833
5495
            None,
834
5495
            parsed_font.num_glyphs(),
835
5495
            &mut raw_glyphs,
836
        )
837
5495
        .map_err(|e| LayoutError::ShapingError(e.to_string()))?;
838
    }
839

            
840
5495
    let mut infos = gpos::Info::init_from_glyphs(opt_gdef, raw_glyphs);
841

            
842
5495
    if let Some(gpos) = parsed_font.gpos() {
843
5495
        let kern_table = parsed_font
844
5495
            .opt_kern_table
845
5495
            .as_ref()
846
5495
            .map(|kt| kt.as_borrowed());
847
5495
        let apply_kerning = true; // Always enable GPOS kern feature (not just when legacy kern table exists)
848
5495
        gpos::apply(
849
5495
            gpos,
850
5495
            opt_gdef,
851
5495
            kern_table,
852
5495
            apply_kerning,
853
5495
            &Features::Custom(user_features),
854
5495
            None,
855
5495
            script_tag,
856
5495
            Some(lang_tag),
857
5495
            &mut infos,
858
        )
859
5495
        .map_err(|e| LayoutError::ShapingError(e.to_string()))?;
860
    }
861

            
862
5495
    let font_size = style.font_size_px;
863
5495
    let scale_factor = if parsed_font.font_metrics.units_per_em > 0 {
864
5495
        font_size / (parsed_font.font_metrics.units_per_em as f32)
865
    } else {
866
        FALLBACK_SCALE
867
    };
868

            
869
5495
    let font_hash = parsed_font.get_hash();
870
5495
    let font_metrics = LayoutFontMetrics {
871
5495
        ascent: parsed_font.font_metrics.ascent,
872
5495
        descent: parsed_font.font_metrics.descent,
873
5495
        line_gap: parsed_font.font_metrics.line_gap,
874
5495
        units_per_em: parsed_font.font_metrics.units_per_em,
875
5495
        x_height: parsed_font.font_metrics.x_height,
876
5495
        cap_height: parsed_font.font_metrics.cap_height,
877
5495
    };
878
5495
    let style_arc = Arc::new(style.clone());
879
5495
    let bidi_level = BidiLevel::new(if direction.is_rtl() { 1 } else { 0 });
880

            
881
5495
    let mut shaped_glyphs = Vec::new();
882
60585
    for info in infos.iter() {
883
60585
        let cluster = info.glyph.liga_component_pos as u32;
884
60585
        let source_char = text
885
60585
            .get(cluster as usize..)
886
60585
            .and_then(|s| s.chars().next())
887
60585
            .unwrap_or('\u{FFFD}');
888

            
889
60585
        let base_advance = parsed_font.get_horizontal_advance(info.glyph.glyph_index);
890
        // Use hinted advance width when available (matches FreeType/Chrome behavior)
891
60585
        let ppem = font_size.round() as u16;
892
60585
        let advance = parsed_font
893
60585
            .get_hinted_advance_px(info.glyph.glyph_index, ppem)
894
60585
            .unwrap_or(base_advance as f32 * scale_factor);
895
60585
        let kerning = info.kerning as f32 * scale_factor;
896

            
897
60585
        let (offset_x_units, offset_y_units) =
898
60585
            if let allsorts::gpos::Placement::Distance(x, y) = info.placement {
899
                (x, y)
900
            } else {
901
60585
                (0, 0)
902
            };
903
60585
        let offset_x = offset_x_units as f32 * scale_factor;
904
60585
        let offset_y = offset_y_units as f32 * scale_factor;
905

            
906
60585
        let vert = parsed_font.get_vertical_metrics(info.glyph.glyph_index);
907
60585
        let glyph = Glyph {
908
60585
            glyph_id: info.glyph.glyph_index,
909
60585
            codepoint: source_char,
910
60585
            font_hash,
911
60585
            font_metrics: font_metrics.clone(),
912
60585
            style: Arc::clone(&style_arc),
913
60585
            source: GlyphSource::Char,
914
60585
            logical_byte_index: cluster as usize,
915
60585
            logical_byte_len: source_char.len_utf8(),
916
            content_index: 0,
917
60585
            cluster,
918
60585
            advance,
919
60585
            kerning,
920
60585
            offset: Point {
921
60585
                x: offset_x,
922
60585
                y: offset_y,
923
60585
            },
924
60585
            vertical_advance: vert.as_ref().map(|v| v.advance * font_size).unwrap_or(0.0),
925
60585
            vertical_origin_y: vert.as_ref().map(|v| v.origin_y * font_size).unwrap_or(0.0),
926
60585
            vertical_bearing: vert
927
60585
                .map(|v| Point { x: v.bearing_x * font_size, y: v.bearing_y * font_size })
928
60585
                .unwrap_or(Point { x: 0.0, y: 0.0 }),
929
60585
            orientation: GlyphOrientation::Horizontal,
930
60585
            script,
931
60585
            bidi_level,
932
        };
933
60585
        shaped_glyphs.push(glyph);
934
    }
935

            
936
5495
    Ok(shaped_glyphs)
937
5495
}
938

            
939
/// Public helper function to shape text for ParsedFont, returning Glyph
940
/// This is used by the ParsedFontTrait implementation for ParsedFont
941
pub fn shape_text_for_parsed_font(
942
    parsed_font: &ParsedFont,
943
    text: &str,
944
    script: Script,
945
    language: crate::text3::script::Language,
946
    direction: BidiDirection,
947
    style: &StyleProperties,
948
) -> Result<Vec<Glyph>, LayoutError> {
949
    // Delegate to the single internal implementation
950
    shape_text_internal(parsed_font, text, script, language, direction, style)
951
}