1
//! Core FFI-safe types used across crate boundaries.
2
//!
3
//! This module defines the fundamental types for FFI interop: [`AzString`] (an FFI-safe
4
//! string backed by [`U8Vec`] with destructor-based memory management), [`EmptyStruct`] (a
5
//! non-zero-size unit type), and various `Vec`/`Option` wrappers generated by the
6
//! `impl_vec!` and `impl_option!` macros.
7

            
8
use alloc::{
9
    string::{String, ToString},
10
    vec::Vec,
11
};
12

            
13
use crate::props::basic::ColorU;
14

            
15
// ============================================================================
16
// EmptyStruct type - FFI-safe replacement for ()
17
// ============================================================================
18

            
19
/// FFI-safe void type to replace `()` in Result types.
20
/// 
21
/// Since `()` (unit type) has zero size, it's not FFI-safe.
22
/// This type provides a minimal 1-byte representation that can be
23
/// safely passed across the C ABI boundary.
24
/// 
25
/// # Usage
26
/// Instead of `Result<(), Error>`, use `Result<EmptyStruct, Error>`.
27
/// 
28
/// # Example
29
/// ```ignore
30
/// fn do_something() -> Result<EmptyStruct, MyError> {
31
///     // ... do work ...
32
///     Ok(EmptyStruct::default())
33
/// }
34
/// ```
35
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
36
#[repr(C)]
37
#[derive(Default)]
38
pub struct EmptyStruct {
39
    /// Reserved byte to ensure the struct has non-zero size.
40
    /// Always initialized to 0.
41
    pub _reserved: u8,
42
}
43

            
44

            
45
impl EmptyStruct {
46
    /// Create a new EmptyStruct value (equivalent to `()`)
47
    #[must_use]
48
    pub const fn new() -> Self {
49
        Self { _reserved: 0 }
50
    }
51
}
52

            
53
impl From<()> for EmptyStruct {
54
    fn from(_: ()) -> Self {
55
        Self::default()
56
    }
57
}
58

            
59
impl From<EmptyStruct> for () {
60
    fn from(_: EmptyStruct) -> Self {
61
        
62
    }
63
}
64

            
65
// ============================================================================
66
// Debug message types
67
// ============================================================================
68

            
69
/// Debug message severity or category for layout diagnostics.
70
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
71
#[repr(C)]
72
#[derive(Default)]
73
pub enum LayoutDebugMessageType {
74
    #[default]
75
    Info,
76
    Warning,
77
    Error,
78
    // Layout-specific categories for filtering
79
    BoxProps,
80
    CssGetter,
81
    /// Block Formatting Context layout
82
    BfcLayout,
83
    /// Inline Formatting Context layout
84
    IfcLayout,
85
    TableLayout,
86
    DisplayType,
87
    PositionCalculation,
88
}
89

            
90

            
91
/// A debug message emitted during layout, with severity, text, and source location.
92
#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
93
#[repr(C)]
94
pub struct LayoutDebugMessage {
95
    pub message_type: LayoutDebugMessageType,
96
    pub message: AzString,
97
    pub location: AzString,
98
}
99

            
100
impl LayoutDebugMessage {
101
    /// Create a new debug message with automatic caller location tracking
102
    #[track_caller]
103
4161622
    pub fn new(message_type: LayoutDebugMessageType, message: impl Into<String>) -> Self {
104
4161622
        let location = core::panic::Location::caller();
105
4161622
        Self {
106
4161622
            message_type,
107
4161622
            message: AzString::from_string(message.into()),
108
4161622
            location: AzString::from_string(format!(
109
4161622
                "{}:{}:{}",
110
4161622
                location.file(),
111
4161622
                location.line(),
112
4161622
                location.column()
113
4161622
            )),
114
4161622
        }
115
4161622
    }
116

            
117
    /// Helper for Info messages
118
    #[track_caller]
119
3451665
    pub fn info(message: impl Into<String>) -> Self {
120
3451665
        Self::new(LayoutDebugMessageType::Info, message)
121
3451665
    }
122

            
123
    /// Helper for Warning messages
124
    #[track_caller]
125
    pub fn warning(message: impl Into<String>) -> Self {
126
        Self::new(LayoutDebugMessageType::Warning, message)
127
    }
128

            
129
    /// Helper for Error messages
130
    #[track_caller]
131
    pub fn error(message: impl Into<String>) -> Self {
132
        Self::new(LayoutDebugMessageType::Error, message)
133
    }
134

            
135
    /// Helper for BoxProps debug messages
136
    #[track_caller]
137
118492
    pub fn box_props(message: impl Into<String>) -> Self {
138
118492
        Self::new(LayoutDebugMessageType::BoxProps, message)
139
118492
    }
140

            
141
    /// Helper for CSS Getter debug messages
142
    #[track_caller]
143
    pub fn css_getter(message: impl Into<String>) -> Self {
144
        Self::new(LayoutDebugMessageType::CssGetter, message)
145
    }
146

            
147
    /// Helper for BFC Layout debug messages
148
    #[track_caller]
149
    pub fn bfc_layout(message: impl Into<String>) -> Self {
150
        Self::new(LayoutDebugMessageType::BfcLayout, message)
151
    }
152

            
153
    /// Helper for IFC Layout debug messages
154
    #[track_caller]
155
465756
    pub fn ifc_layout(message: impl Into<String>) -> Self {
156
465756
        Self::new(LayoutDebugMessageType::IfcLayout, message)
157
465756
    }
158

            
159
    /// Helper for Table Layout debug messages
160
    #[track_caller]
161
57816
    pub fn table_layout(message: impl Into<String>) -> Self {
162
57816
        Self::new(LayoutDebugMessageType::TableLayout, message)
163
57816
    }
164

            
165
    /// Helper for Display Type debug messages
166
    #[track_caller]
167
    pub fn display_type(message: impl Into<String>) -> Self {
168
        Self::new(LayoutDebugMessageType::DisplayType, message)
169
    }
170
}
171

            
172
/// FFI-safe string type backed by [`U8Vec`] with destructor-based memory management.
173
///
174
/// Contents are guaranteed to be valid UTF-8 by all safe constructors.
175
/// Memory ownership is tracked via the inner `U8Vec`'s destructor field.
176
#[repr(C)]
177
pub struct AzString {
178
    pub vec: U8Vec,
179
}
180

            
181
impl_option!(
182
    AzString,
183
    OptionString,
184
    copy = false,
185
    [Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
186
);
187

            
188
static DEFAULT_STR: &str = "";
189

            
190
impl Default for AzString {
191
4815
    fn default() -> Self {
192
4815
        DEFAULT_STR.into()
193
4815
    }
194
}
195

            
196
impl<'a> From<&'a str> for AzString {
197
4679161
    fn from(s: &'a str) -> Self {
198
4679161
        s.to_string().into()
199
4679161
    }
200
}
201

            
202
impl AsRef<str> for AzString {
203
    fn as_ref(&self) -> &str {
204
        self.as_str()
205
    }
206
}
207

            
208
impl core::fmt::Debug for AzString {
209
3975128
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
210
3975128
        self.as_str().fmt(f)
211
3975128
    }
212
}
213

            
214
impl core::fmt::Display for AzString {
215
93
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
216
93
        self.as_str().fmt(f)
217
93
    }
218
}
219

            
220
impl AzString {
221
    #[inline]
222
933771
    pub const fn from_const_str(s: &'static str) -> Self {
223
933771
        Self {
224
933771
            vec: U8Vec::from_const_slice(s.as_bytes()),
225
933771
        }
226
933771
    }
227

            
228
    /// Creates a new AzString from a null-terminated C string (const char*).
229
    /// This copies the string data into a new allocation.
230
    ///
231
    /// # Safety
232
    /// - `ptr` must be a valid pointer to a null-terminated UTF-8 string
233
    /// - The string must remain valid for the duration of this call
234
    ///
235
    /// Note: `ptr` is `*const i8` rather than `*const core::ffi::c_char`
236
    /// so the auto-generated FFI signature in `dll_api_internal.rs`
237
    /// (which uses a literal `i8`) matches on every target —
238
    /// `c_char` is `i8` on x86/ARM Apple/Windows/Linux but `u8` on
239
    /// Android, which would otherwise produce a `*const u8 vs *const i8`
240
    /// mismatch at codegen-call sites. We cast internally before
241
    /// handing the pointer to `CStr::from_ptr`.
242
    #[inline]
243
    pub unsafe fn from_c_str(ptr: *const i8) -> Self {
244
        if ptr.is_null() {
245
            return Self::default();
246
        }
247
        let c_str = core::ffi::CStr::from_ptr(ptr as *const core::ffi::c_char);
248
        let bytes = c_str.to_bytes();
249
        Self::copy_from_bytes(bytes.as_ptr(), 0, bytes.len())
250
    }
251

            
252
    /// Copies bytes from a pointer into a new AzString.
253
    /// This is useful for C FFI where you have a char* buffer.
254
    ///
255
    /// Invalid UTF-8 sequences are replaced with U+FFFD to maintain
256
    /// the UTF-8 invariant required by [`as_str()`](Self::as_str).
257
    ///
258
    /// `#[inline(always)]` (2026-06-03 web-lift FIX): forces inlining into the
259
    /// `extern "C" AzString_copyFromBytes` wrapper so there is NO separate
260
    /// C-ABI(X8-sret) → Rust-ABI(X0-sret) boundary call. The lift mis-threads
261
    /// %state across that sret-in-X0 shift (X1/ptr 0x13f80→garbage, X3/len 5→0,
262
    /// empty AzString); inlining lets the wrapper do the alloc/memcpy directly
263
    /// with the standard X8-sret ABI the cascade proves works.
264
    #[inline(always)]
265
    pub fn copy_from_bytes(ptr: *const u8, start: usize, len: usize) -> Self {
266
        let raw = U8Vec::copy_from_bytes(ptr, start, len);
267
        // web-lift FIX (2026-06-03): FAST PATH for already-valid UTF-8 (the common case, incl. all
268
        // ASCII like "Hello") — wrap the U8Vec directly, avoiding the `String::from_utf8_lossy()
269
        // .into_owned()` std sret-in-X0 call that the lift mis-threads (the returned String comes
270
        // back with len=0 → empty AzString). `core::str::from_utf8` returns a `Result<&str,_>` (a
271
        // slice, NOT a by-value struct) so it has no sret to mis-thread. Also a real perf win (no
272
        // 2nd alloc+copy for valid input). Slow path (rare, invalid UTF-8) keeps the lossy replace.
273
        if core::str::from_utf8(raw.as_ref()).is_ok() {
274
            return Self { vec: raw };
275
        }
276
        let s = String::from_utf8_lossy(raw.as_ref()).into_owned();
277
        Self::from_string(s)
278
    }
279

            
280
    #[inline(always)] // web-lift: inline through the sret-in-X0 chain (see copy_from_bytes)
281
15552558
    pub fn from_string(s: String) -> Self {
282
15552558
        Self {
283
15552558
            vec: U8Vec::from_vec(s.into_bytes()),
284
15552558
        }
285
15552558
    }
286

            
287
    #[inline]
288
5190523
    pub fn as_str(&self) -> &str {
289
5190523
        unsafe { core::str::from_utf8_unchecked(self.vec.as_ref()) }
290
5190523
    }
291

            
292
    /// NOTE: CLONES the memory if the memory is external or &'static
293
    /// Moves the memory out if the memory is library-allocated
294
    #[inline]
295
324550
    pub fn clone_self(&self) -> Self {
296
324550
        Self {
297
324550
            vec: self.vec.clone_self(),
298
324550
        }
299
324550
    }
300

            
301
    #[inline]
302
106071
    pub fn into_library_owned_string(self) -> String {
303
106071
        match self.vec.destructor {
304
            U8VecDestructor::NoDestructor | U8VecDestructor::External(_) | U8VecDestructor::AlreadyDestroyed => {
305
                self.as_str().to_string()
306
            }
307
            U8VecDestructor::DefaultRust => {
308
106071
                let m = core::mem::ManuallyDrop::new(self);
309
106071
                unsafe { String::from_raw_parts(m.vec.ptr as *mut u8, m.vec.len, m.vec.cap) }
310
            }
311
        }
312
106071
    }
313

            
314
    #[inline]
315
    pub fn as_bytes(&self) -> &[u8] {
316
        self.vec.as_ref()
317
    }
318

            
319
    #[inline]
320
    pub fn into_bytes(self) -> U8Vec {
321
        let m = core::mem::ManuallyDrop::new(self);
322
        U8Vec {
323
            ptr: m.vec.ptr,
324
            len: m.vec.len,
325
            cap: m.vec.cap,
326
            destructor: m.vec.destructor,
327
        }
328
    }
329

            
330
    /// Returns the length of the string in bytes (not including null terminator)
331
    #[inline]
332
1
    pub fn len(&self) -> usize {
333
1
        self.vec.len
334
1
    }
335

            
336
    /// Returns true if the string is empty
337
    #[inline]
338
    pub fn is_empty(&self) -> bool {
339
        self.vec.len == 0
340
    }
341

            
342
    /// Creates a null-terminated copy of the string for C FFI usage.
343
    /// Returns a new U8Vec that contains the string data followed by a null byte.
344
    /// The caller is responsible for freeing this memory.
345
    ///
346
    /// Use this when you need to pass a string to C code that expects `const char*`.
347
    #[inline]
348
    pub fn to_c_str(&self) -> U8Vec {
349
        let bytes = self.as_bytes();
350
        let mut result = Vec::with_capacity(bytes.len() + 1);
351
        result.extend_from_slice(bytes);
352
        result.push(0); // null terminator
353
        U8Vec::from_vec(result)
354
    }
355

            
356
    /// Shared implementation for UTF-16 decoding with a caller-supplied byte-order function.
357
    ///
358
    /// # Safety
359
    /// - `ptr` must be valid for reading `len` bytes
360
    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
361
    unsafe fn from_utf16_with_byte_order(
362
        ptr: *const u8,
363
        len: usize,
364
        from_bytes: fn([u8; 2]) -> u16,
365
    ) -> Self {
366
        if ptr.is_null() || len == 0 {
367
            return Self::default();
368
        }
369

            
370
        // UTF-16 requires pairs of bytes
371
        if !len.is_multiple_of(2) {
372
            return Self::default();
373
        }
374

            
375
        let byte_slice = core::slice::from_raw_parts(ptr, len);
376
        let code_units: Vec<u16> = byte_slice
377
            .chunks_exact(2)
378
            .map(|chunk| from_bytes([chunk[0], chunk[1]]))
379
            .collect();
380

            
381
        match String::from_utf16(&code_units) {
382
            Ok(s) => Self::from_string(s),
383
            Err(_) => Self::default(),
384
        }
385
    }
386

            
387
    /// Creates a new AzString from UTF-16 encoded bytes (little-endian).
388
    /// Returns an empty string if the input is invalid UTF-16 or has odd length.
389
    ///
390
    /// # Arguments
391
    /// * `ptr` - Pointer to UTF-16 encoded bytes
392
    /// * `len` - Length in bytes (not code units) - must be even
393
    ///
394
    /// # Safety
395
    /// - `ptr` must be valid for reading `len` bytes
396
    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
397
    #[inline]
398
    pub unsafe fn from_utf16_le(ptr: *const u8, len: usize) -> Self {
399
        Self::from_utf16_with_byte_order(ptr, len, u16::from_le_bytes)
400
    }
401

            
402
    /// Creates a new AzString from UTF-16 encoded bytes (big-endian).
403
    /// Returns an empty string if the input is invalid UTF-16 or has odd length.
404
    ///
405
    /// # Arguments
406
    /// * `ptr` - Pointer to UTF-16 encoded bytes
407
    /// * `len` - Length in bytes (not code units) - must be even
408
    ///
409
    /// # Safety
410
    /// - `ptr` must be valid for reading `len` bytes
411
    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
412
    #[inline]
413
    pub unsafe fn from_utf16_be(ptr: *const u8, len: usize) -> Self {
414
        Self::from_utf16_with_byte_order(ptr, len, u16::from_be_bytes)
415
    }
416

            
417
    /// Creates a new AzString from UTF-8 bytes with lossy conversion.
418
    /// Invalid UTF-8 sequences are replaced with the Unicode replacement character (U+FFFD).
419
    ///
420
    /// # Safety
421
    /// - `ptr` must be valid for reading `len` bytes
422
    #[inline]
423
    pub unsafe fn from_utf8_lossy(ptr: *const u8, len: usize) -> Self {
424
        if ptr.is_null() || len == 0 {
425
            return Self::default();
426
        }
427
        
428
        let byte_slice = core::slice::from_raw_parts(ptr, len);
429
        let s = String::from_utf8_lossy(byte_slice).into_owned();
430
        Self::from_string(s)
431
    }
432

            
433
    /// Creates a new AzString from UTF-8 bytes.
434
    /// Returns an empty string if the input is not valid UTF-8.
435
    ///
436
    /// # Safety
437
    /// - `ptr` must be valid for reading `len` bytes
438
    #[inline]
439
    pub unsafe fn from_utf8(ptr: *const u8, len: usize) -> Self {
440
        if ptr.is_null() || len == 0 {
441
            return Self::default();
442
        }
443
        
444
        let byte_slice = core::slice::from_raw_parts(ptr, len);
445
        match core::str::from_utf8(byte_slice) {
446
            Ok(s) => Self::from_string(s.to_string()),
447
            Err(_) => Self::default(),
448
        }
449
    }
450
}
451

            
452
impl From<String> for AzString {
453
4978314
    fn from(input: String) -> AzString {
454
4978314
        AzString::from_string(input)
455
4978314
    }
456
}
457

            
458
impl PartialOrd for AzString {
459
    fn partial_cmp(&self, rhs: &Self) -> Option<core::cmp::Ordering> {
460
        self.as_str().partial_cmp(rhs.as_str())
461
    }
462
}
463

            
464
impl Ord for AzString {
465
    fn cmp(&self, rhs: &Self) -> core::cmp::Ordering {
466
        self.as_str().cmp(rhs.as_str())
467
    }
468
}
469

            
470
impl Clone for AzString {
471
324403
    fn clone(&self) -> Self {
472
324403
        self.clone_self()
473
324403
    }
474
}
475

            
476
impl PartialEq for AzString {
477
96
    fn eq(&self, rhs: &Self) -> bool {
478
96
        self.as_str().eq(rhs.as_str())
479
96
    }
480
}
481

            
482
impl Eq for AzString {}
483

            
484
impl core::hash::Hash for AzString {
485
128917
    fn hash<H>(&self, state: &mut H)
486
128917
    where
487
128917
        H: core::hash::Hasher,
488
    {
489
128917
        self.as_str().hash(state)
490
128917
    }
491
}
492

            
493
impl core::ops::Deref for AzString {
494
    type Target = str;
495

            
496
44340
    fn deref(&self) -> &str {
497
44340
        self.as_str()
498
44340
    }
499
}
500

            
501
impl_option!(
502
    u8,
503
    OptionU8,
504
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
505
);
506

            
507
impl_vec!(u8, U8Vec, U8VecDestructor, U8VecDestructorType, U8VecSlice, OptionU8);
508
impl_vec_mut!(u8, U8Vec);
509
impl_vec_debug!(u8, U8Vec);
510
impl_vec_partialord!(u8, U8Vec);
511
impl_vec_ord!(u8, U8Vec);
512
impl_vec_clone!(u8, U8Vec, U8VecDestructor);
513
impl_vec_partialeq!(u8, U8Vec);
514
impl_vec_eq!(u8, U8Vec);
515
impl_vec_hash!(u8, U8Vec);
516

            
517
impl U8Vec {
518
    /// Copies bytes from a pointer into a new Vec.
519
    /// This is useful for C FFI where you have a uint8_t* buffer.
520
    ///
521
    /// # Safety contract (caller must ensure)
522
    /// - `ptr` must be valid for reading `start + len` bytes
523
    /// - `start + len` must not overflow
524
    #[inline(always)] // web-lift: inline through the sret-in-X0 chain (see AzString::copy_from_bytes)
525
    pub fn copy_from_bytes(ptr: *const u8, start: usize, len: usize) -> Self {
526
        if ptr.is_null() || len == 0 {
527
            return Self::new();
528
        }
529
        debug_assert!(
530
            start.checked_add(len).is_some(),
531
            "U8Vec::copy_from_bytes: start + len overflows"
532
        );
533
        let slice = unsafe { core::slice::from_raw_parts(ptr.add(start), len) };
534
        Self::from_vec(slice.to_vec())
535
    }
536
}
537

            
538
impl_option!(
539
    U8Vec,
540
    OptionU8Vec,
541
    copy = false,
542
    [Debug, Clone, PartialEq, Ord, PartialOrd, Eq, Hash]
543
);
544

            
545
impl_vec!(u16, U16Vec, U16VecDestructor, U16VecDestructorType, U16VecSlice, OptionU16);
546
impl_vec_debug!(u16, U16Vec);
547
impl_vec_partialord!(u16, U16Vec);
548
impl_vec_ord!(u16, U16Vec);
549
impl_vec_clone!(u16, U16Vec, U16VecDestructor);
550
impl_vec_partialeq!(u16, U16Vec);
551
impl_vec_eq!(u16, U16Vec);
552
impl_vec_hash!(u16, U16Vec);
553

            
554
impl_vec!(f32, F32Vec, F32VecDestructor, F32VecDestructorType, F32VecSlice, OptionF32);
555
impl_vec_debug!(f32, F32Vec);
556
impl_vec_partialord!(f32, F32Vec);
557
impl_vec_clone!(f32, F32Vec, F32VecDestructor);
558
impl_vec_partialeq!(f32, F32Vec);
559

            
560
// Vec<char>
561
impl_vec!(u32, U32Vec, U32VecDestructor, U32VecDestructorType, U32VecSlice, OptionU32);
562
impl_vec_mut!(u32, U32Vec);
563
impl_vec_debug!(u32, U32Vec);
564
impl_vec_partialord!(u32, U32Vec);
565
impl_vec_ord!(u32, U32Vec);
566
impl_vec_clone!(u32, U32Vec, U32VecDestructor);
567
impl_vec_partialeq!(u32, U32Vec);
568
impl_vec_eq!(u32, U32Vec);
569
impl_vec_hash!(u32, U32Vec);
570

            
571
impl_vec!(AzString, StringVec, StringVecDestructor, StringVecDestructorType, StringVecSlice, OptionString);
572
impl_vec_debug!(AzString, StringVec);
573
impl_vec_partialord!(AzString, StringVec);
574
impl_vec_ord!(AzString, StringVec);
575
impl_vec_clone!(AzString, StringVec, StringVecDestructor);
576
impl_vec_partialeq!(AzString, StringVec);
577
impl_vec_eq!(AzString, StringVec);
578
impl_vec_hash!(AzString, StringVec);
579

            
580
impl From<Vec<String>> for StringVec {
581
    fn from(v: Vec<String>) -> StringVec {
582
        let new_v: Vec<AzString> = v.into_iter().map(|s| s.into()).collect();
583
        new_v.into()
584
    }
585
}
586

            
587
impl_option!(
588
    StringVec,
589
    OptionStringVec,
590
    copy = false,
591
    [Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Hash]
592
);
593

            
594
impl_option!(
595
    u16,
596
    OptionU16,
597
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
598
);
599
impl_option!(
600
    u32,
601
    OptionU32,
602
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
603
);
604
impl_option!(
605
    u64,
606
    OptionU64,
607
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
608
);
609
impl_option!(
610
    usize,
611
    OptionUsize,
612
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
613
);
614
impl_option!(
615
    i16,
616
    OptionI16,
617
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
618
);
619
impl_option!(
620
    i32,
621
    OptionI32,
622
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
623
);
624
impl_option!(bool, OptionBool, [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]);
625
impl_option!(f32, OptionF32, [Debug, Copy, Clone, PartialEq, PartialOrd]);
626
impl_option!(f64, OptionF64, [Debug, Copy, Clone, PartialEq, PartialOrd]);
627

            
628
// Manual implementations for Hash and Ord on OptionF32 (since f32 doesn't implement these traits)
629
impl core::hash::Hash for OptionF32 {
630
    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
631
        match self {
632
            OptionF32::None => 0u8.hash(state),
633
            OptionF32::Some(v) => {
634
                1u8.hash(state);
635
                v.to_bits().hash(state);
636
            }
637
        }
638
    }
639
}
640

            
641
impl Eq for OptionF32 {}
642

            
643
impl Ord for OptionF32 {
644
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
645
        match (self, other) {
646
            (OptionF32::None, OptionF32::None) => core::cmp::Ordering::Equal,
647
            (OptionF32::None, OptionF32::Some(_)) => core::cmp::Ordering::Less,
648
            (OptionF32::Some(_), OptionF32::None) => core::cmp::Ordering::Greater,
649
            (OptionF32::Some(a), OptionF32::Some(b)) => {
650
                a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal)
651
            }
652
        }
653
    }
654
}
655

            
656
// ============================================================================
657
// StringArena — bump allocator for AzString bytes
658
// ============================================================================
659
//
660
// Consolidates thousands of small AzString allocations (tag names,
661
// attribute values, text content) into a handful of 64 KiB chunks.
662
// Each arena-backed AzString uses `U8VecDestructor::External` and stashes
663
// a cloned `Arc<StringArenaInner>` pointer in the `cap` field — dropping
664
// the AzString decrements the refcount, and the backing bytes are freed
665
// only when the last reference goes away. This works across FFI without
666
// changing any public struct layout.
667

            
668
use alloc::sync::Arc;
669
use core::cell::UnsafeCell;
670

            
671
/// Shared interior of a [`StringArena`]. Refcounted via `Arc<Self>`;
672
/// never accessed through its `Arc` for mutation — only the owning
673
/// `StringArena` (with `&mut self`) mutates the chunks.
674
struct StringArenaInner {
675
    /// Pre-allocated byte chunks. Pointers into a chunk stay valid
676
    /// because we never push past `Vec::capacity()` — no reallocation.
677
    chunks: UnsafeCell<Vec<Vec<u8>>>,
678
    /// Remaining unused bytes in the last chunk; `0` when a fresh
679
    /// chunk is needed.
680
    current_remaining: UnsafeCell<usize>,
681
}
682

            
683
// Safety:
684
// - Mutation through `UnsafeCell` only happens via `&mut StringArena`,
685
//   which owns the sole external reference to `Arc<StringArenaInner>`
686
//   held in a `StringArena`. Other `Arc` references live inside AzString
687
//   destructors and never touch chunks — they only drop the Arc.
688
// - `Arc<T>` itself needs `T: Send + Sync` to cross threads; since the
689
//   destructor can run on any thread, we claim Send+Sync and rely on the
690
//   single-writer invariant for mutation safety.
691
unsafe impl Send for StringArenaInner {}
692
unsafe impl Sync for StringArenaInner {}
693

            
694
/// Bump allocator backing arena-owned `AzString` instances.
695
///
696
/// Every AzString returned by [`StringArena::intern`] holds a cloned
697
/// `Arc` to this arena; the backing bytes stay alive until the last
698
/// such AzString (and the arena handle itself) is dropped.
699
///
700
/// Intended use: create one arena per XML/HTML parse pass, intern all
701
/// tag names / attribute values / text content through it, then drop the
702
/// handle. The AzStrings embedded in the resulting `StyledDom` keep the
703
/// arena alive for as long as they need the bytes.
704
pub struct StringArena {
705
    inner: Arc<StringArenaInner>,
706
}
707

            
708
impl StringArena {
709
    /// Size of a freshly allocated chunk. Large enough that a typical
710
    /// DOM parse fits in 1-2 chunks, small enough to not over-allocate
711
    /// for small documents.
712
    pub const CHUNK_SIZE: usize = 64 * 1024;
713

            
714
5
    pub fn new() -> Self {
715
5
        Self {
716
5
            inner: Arc::new(StringArenaInner {
717
5
                chunks: UnsafeCell::new(Vec::new()),
718
5
                current_remaining: UnsafeCell::new(0),
719
5
            }),
720
5
        }
721
5
    }
722

            
723
    /// Returns `(chunk_count, total_bytes_used)` for metrics.
724
1
    pub fn metrics(&self) -> (usize, usize) {
725
        // Safety: metrics is read-only; the caller holds &self so no
726
        // concurrent mutation via &mut self is possible.
727
        unsafe {
728
1
            let chunks = &*self.inner.chunks.get();
729
1
            let total: usize = chunks.iter().map(|c| c.len()).sum();
730
1
            (chunks.len(), total)
731
        }
732
1
    }
733

            
734
    /// Intern `s` into the arena and return an AzString whose backing
735
    /// bytes live inside the arena. The returned AzString owns a cloned
736
    /// `Arc` reference; dropping it decrements the refcount, and the
737
    /// arena frees its chunks when the final reference is released.
738
106
    pub fn intern(&mut self, s: &str) -> AzString {
739
106
        let bytes = s.as_bytes();
740
106
        let len = bytes.len();
741

            
742
106
        let ptr: *const u8 = if len == 0 {
743
            // Empty strings don't need arena storage; a non-null dangling
744
            // pointer is fine because `len == 0` means nobody will deref.
745
1
            core::ptr::NonNull::<u8>::dangling().as_ptr()
746
        } else {
747
            // Safety: `&mut self` ⇒ exclusive access to inner chunks.
748
            unsafe {
749
105
                let chunks: &mut Vec<Vec<u8>> = &mut *self.inner.chunks.get();
750
105
                let remaining: &mut usize = &mut *self.inner.current_remaining.get();
751

            
752
                // Oversized strings get their own dedicated chunk so we
753
                // don't waste the tail of the current chunk.
754
105
                if len > Self::CHUNK_SIZE / 2 {
755
1
                    let mut v = Vec::with_capacity(len);
756
1
                    v.extend_from_slice(bytes);
757
1
                    let p = v.as_ptr();
758
1
                    chunks.push(v);
759
1
                    p
760
                } else {
761
104
                    if *remaining < len {
762
4
                        chunks.push(Vec::with_capacity(Self::CHUNK_SIZE));
763
4
                        *remaining = Self::CHUNK_SIZE;
764
100
                    }
765
                    // Safety: chunk was allocated with capacity ≥ len and
766
                    // `remaining` tracks unused capacity — no realloc.
767
104
                    let chunk = chunks.last_mut().unwrap();
768
104
                    let offset = chunk.len();
769
104
                    chunk.extend_from_slice(bytes);
770
104
                    *remaining -= len;
771
104
                    chunk.as_ptr().add(offset)
772
                }
773
            }
774
        };
775

            
776
        // Each AzString carries its own Arc reference count. Stash the
777
        // raw Arc pointer in `cap` so the External destructor can decrement.
778
106
        let arc_raw = Arc::into_raw(Arc::clone(&self.inner));
779

            
780
106
        AzString {
781
106
            vec: U8Vec {
782
106
                ptr,
783
106
                len,
784
106
                // NOTE: `cap` stores an Arc pointer, not a capacity. This
785
106
                // works because the `External` destructor path never calls
786
106
                // `Vec::from_raw_parts(ptr, len, cap)` — only `DefaultRust`
787
106
                // does that.
788
106
                cap: arc_raw as usize,
789
106
                destructor: U8VecDestructor::External(arena_string_destructor),
790
106
            },
791
106
        }
792
106
    }
793
}
794

            
795
impl Default for StringArena {
796
    fn default() -> Self {
797
        Self::new()
798
    }
799
}
800

            
801
/// Destructor installed on every arena-backed AzString. Reads the Arc
802
/// pointer out of `cap` and drops one Arc reference; when the count
803
/// reaches zero the `StringArenaInner` is freed.
804
106
extern "C" fn arena_string_destructor(vec: *mut U8Vec) {
805
    // Safety: called at most once per AzString drop. `cap` was set by
806
    // `StringArena::intern` to `Arc::into_raw(Arc<StringArenaInner>)`.
807
    unsafe {
808
106
        let v = &mut *vec;
809
106
        let arc_raw = v.cap as *const StringArenaInner;
810
106
        if !arc_raw.is_null() {
811
106
            let _ = Arc::from_raw(arc_raw);
812
106
            // Prevent a hypothetical double-drop from dereferencing
813
106
            // freed memory.
814
106
            v.cap = 0;
815
106
        }
816
    }
817
106
}
818

            
819
#[cfg(test)]
820
mod string_arena_tests {
821
    use super::*;
822

            
823
    #[test]
824
1
    fn intern_round_trip() {
825
1
        let mut arena = StringArena::new();
826
1
        let a = arena.intern("hello");
827
1
        let b = arena.intern("world");
828
1
        let c = arena.intern("");
829
1
        assert_eq!(a.as_str(), "hello");
830
1
        assert_eq!(b.as_str(), "world");
831
1
        assert_eq!(c.as_str(), "");
832
1
    }
833

            
834
    #[test]
835
1
    fn strings_outlive_arena_handle() {
836
1
        let a = {
837
1
            let mut arena = StringArena::new();
838
1
            arena.intern("survives drop of arena handle")
839
        };
840
1
        assert_eq!(a.as_str(), "survives drop of arena handle");
841
1
    }
842

            
843
    #[test]
844
1
    fn oversized_string_gets_dedicated_chunk() {
845
1
        let mut arena = StringArena::new();
846
1
        let big = "x".repeat(StringArena::CHUNK_SIZE);
847
1
        let s = arena.intern(&big);
848
1
        assert_eq!(s.len(), big.len());
849
1
        assert_eq!(s.as_str(), big.as_str());
850
1
    }
851

            
852
    #[test]
853
1
    fn many_small_strings_share_chunk() {
854
1
        let mut arena = StringArena::new();
855
1
        let mut strings = Vec::new();
856
101
        for i in 0..100 {
857
100
            strings.push(arena.intern(&format!("s{i}")));
858
100
        }
859
1
        let (chunks, _bytes) = arena.metrics();
860
1
        assert!(chunks <= 2, "expected ≤2 chunks for 100 small strings, got {chunks}");
861
100
        for (i, s) in strings.iter().enumerate() {
862
100
            assert_eq!(s.as_str(), format!("s{i}"));
863
        }
864
1
    }
865

            
866
    #[test]
867
1
    fn clone_deep_copies_and_is_independent() {
868
        // Cloning an External AzString deep-copies into DefaultRust, so
869
        // the clone doesn't depend on the arena at all.
870
1
        let clone = {
871
1
            let mut arena = StringArena::new();
872
1
            let a = arena.intern("deep-copy test");
873
1
            a.clone()
874
        };
875
1
        assert_eq!(clone.as_str(), "deep-copy test");
876
1
    }
877
}