1
//! Core FFI-safe types used across crate boundaries.
2
//!
3
//! This module defines the fundamental types for FFI interop: [`AzString`] (an FFI-safe
4
//! string backed by [`U8Vec`] with destructor-based memory management), [`EmptyStruct`] (a
5
//! non-zero-size unit type), and various `Vec`/`Option` wrappers generated by the
6
//! `impl_vec!` and `impl_option!` macros.
7

            
8
use alloc::{
9
    string::{String, ToString},
10
    vec::Vec,
11
};
12

            
13
use crate::props::basic::ColorU;
14

            
15
// ============================================================================
16
// EmptyStruct type - FFI-safe replacement for ()
17
// ============================================================================
18

            
19
/// FFI-safe void type to replace `()` in Result types.
20
/// 
21
/// Since `()` (unit type) has zero size, it's not FFI-safe.
22
/// This type provides a minimal 1-byte representation that can be
23
/// safely passed across the C ABI boundary.
24
/// 
25
/// # Usage
26
/// Instead of `Result<(), Error>`, use `Result<EmptyStruct, Error>`.
27
/// 
28
/// # Example
29
/// ```ignore
30
/// fn do_something() -> Result<EmptyStruct, MyError> {
31
///     // ... do work ...
32
///     Ok(EmptyStruct::default())
33
/// }
34
/// ```
35
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
36
#[repr(C)]
37
#[derive(Default)]
38
pub struct EmptyStruct {
39
    /// Reserved byte to ensure the struct has non-zero size.
40
    /// Always initialized to 0.
41
    pub _reserved: u8,
42
}
43

            
44

            
45
impl EmptyStruct {
46
    /// Create a new EmptyStruct value (equivalent to `()`)
47
    #[must_use]
48
    pub const fn new() -> Self {
49
        Self { _reserved: 0 }
50
    }
51
}
52

            
53
impl From<()> for EmptyStruct {
54
    fn from(_: ()) -> Self {
55
        Self::default()
56
    }
57
}
58

            
59
impl From<EmptyStruct> for () {
60
    fn from(_: EmptyStruct) -> Self {
61
        
62
    }
63
}
64

            
65
// ============================================================================
66
// Debug message types
67
// ============================================================================
68

            
69
/// Debug message severity or category for layout diagnostics.
70
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
71
#[repr(C)]
72
#[derive(Default)]
73
pub enum LayoutDebugMessageType {
74
    #[default]
75
    Info,
76
    Warning,
77
    Error,
78
    // Layout-specific categories for filtering
79
    BoxProps,
80
    CssGetter,
81
    /// Block Formatting Context layout
82
    BfcLayout,
83
    /// Inline Formatting Context layout
84
    IfcLayout,
85
    TableLayout,
86
    DisplayType,
87
    PositionCalculation,
88
}
89

            
90

            
91
/// A debug message emitted during layout, with severity, text, and source location.
92
#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
93
#[repr(C)]
94
pub struct LayoutDebugMessage {
95
    pub message_type: LayoutDebugMessageType,
96
    pub message: AzString,
97
    pub location: AzString,
98
}
99

            
100
impl LayoutDebugMessage {
101
    /// Create a new debug message with automatic caller location tracking
102
    #[track_caller]
103
1048835
    pub fn new(message_type: LayoutDebugMessageType, message: impl Into<String>) -> Self {
104
1048835
        let location = core::panic::Location::caller();
105
1048835
        Self {
106
1048835
            message_type,
107
1048835
            message: AzString::from_string(message.into()),
108
1048835
            location: AzString::from_string(format!(
109
1048835
                "{}:{}:{}",
110
1048835
                location.file(),
111
1048835
                location.line(),
112
1048835
                location.column()
113
1048835
            )),
114
1048835
        }
115
1048835
    }
116

            
117
    /// Helper for Info messages
118
    #[track_caller]
119
806163
    pub fn info(message: impl Into<String>) -> Self {
120
806163
        Self::new(LayoutDebugMessageType::Info, message)
121
806163
    }
122

            
123
    /// Helper for Warning messages
124
    #[track_caller]
125
    pub fn warning(message: impl Into<String>) -> Self {
126
        Self::new(LayoutDebugMessageType::Warning, message)
127
    }
128

            
129
    /// Helper for Error messages
130
    #[track_caller]
131
    pub fn error(message: impl Into<String>) -> Self {
132
        Self::new(LayoutDebugMessageType::Error, message)
133
    }
134

            
135
    /// Helper for BoxProps debug messages
136
    #[track_caller]
137
54495
    pub fn box_props(message: impl Into<String>) -> Self {
138
54495
        Self::new(LayoutDebugMessageType::BoxProps, message)
139
54495
    }
140

            
141
    /// Helper for CSS Getter debug messages
142
    #[track_caller]
143
    pub fn css_getter(message: impl Into<String>) -> Self {
144
        Self::new(LayoutDebugMessageType::CssGetter, message)
145
    }
146

            
147
    /// Helper for BFC Layout debug messages
148
    #[track_caller]
149
    pub fn bfc_layout(message: impl Into<String>) -> Self {
150
        Self::new(LayoutDebugMessageType::BfcLayout, message)
151
    }
152

            
153
    /// Helper for IFC Layout debug messages
154
    #[track_caller]
155
105191
    pub fn ifc_layout(message: impl Into<String>) -> Self {
156
105191
        Self::new(LayoutDebugMessageType::IfcLayout, message)
157
105191
    }
158

            
159
    /// Helper for Table Layout debug messages
160
    #[track_caller]
161
45990
    pub fn table_layout(message: impl Into<String>) -> Self {
162
45990
        Self::new(LayoutDebugMessageType::TableLayout, message)
163
45990
    }
164

            
165
    /// Helper for Display Type debug messages
166
    #[track_caller]
167
    pub fn display_type(message: impl Into<String>) -> Self {
168
        Self::new(LayoutDebugMessageType::DisplayType, message)
169
    }
170
}
171

            
172
/// FFI-safe string type backed by [`U8Vec`] with destructor-based memory management.
173
///
174
/// Contents are guaranteed to be valid UTF-8 by all safe constructors.
175
/// Memory ownership is tracked via the inner `U8Vec`'s destructor field.
176
#[repr(C)]
177
pub struct AzString {
178
    pub vec: U8Vec,
179
}
180

            
181
impl_option!(
182
    AzString,
183
    OptionString,
184
    copy = false,
185
    [Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
186
);
187

            
188
static DEFAULT_STR: &str = "";
189

            
190
impl Default for AzString {
191
3925
    fn default() -> Self {
192
3925
        DEFAULT_STR.into()
193
3925
    }
194
}
195

            
196
impl<'a> From<&'a str> for AzString {
197
3736478
    fn from(s: &'a str) -> Self {
198
3736478
        s.to_string().into()
199
3736478
    }
200
}
201

            
202
impl AsRef<str> for AzString {
203
    fn as_ref(&self) -> &str {
204
        self.as_str()
205
    }
206
}
207

            
208
impl core::fmt::Debug for AzString {
209
17213
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
210
17213
        self.as_str().fmt(f)
211
17213
    }
212
}
213

            
214
impl core::fmt::Display for AzString {
215
84
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
216
84
        self.as_str().fmt(f)
217
84
    }
218
}
219

            
220
impl AzString {
221
    #[inline]
222
730913
    pub const fn from_const_str(s: &'static str) -> Self {
223
730913
        Self {
224
730913
            vec: U8Vec::from_const_slice(s.as_bytes()),
225
730913
        }
226
730913
    }
227

            
228
    /// Creates a new AzString from a null-terminated C string (const char*).
229
    /// This copies the string data into a new allocation.
230
    ///
231
    /// # Safety
232
    /// - `ptr` must be a valid pointer to a null-terminated UTF-8 string
233
    /// - The string must remain valid for the duration of this call
234
    ///
235
    /// Note: `ptr` is `*const i8` rather than `*const core::ffi::c_char`
236
    /// so the auto-generated FFI signature in `dll_api_internal.rs`
237
    /// (which uses a literal `i8`) matches on every target —
238
    /// `c_char` is `i8` on x86/ARM Apple/Windows/Linux but `u8` on
239
    /// Android, which would otherwise produce a `*const u8 vs *const i8`
240
    /// mismatch at codegen-call sites. We cast internally before
241
    /// handing the pointer to `CStr::from_ptr`.
242
    #[inline]
243
    pub unsafe fn from_c_str(ptr: *const i8) -> Self {
244
        if ptr.is_null() {
245
            return Self::default();
246
        }
247
        let c_str = core::ffi::CStr::from_ptr(ptr as *const core::ffi::c_char);
248
        let bytes = c_str.to_bytes();
249
        Self::copy_from_bytes(bytes.as_ptr(), 0, bytes.len())
250
    }
251

            
252
    /// Copies bytes from a pointer into a new AzString.
253
    /// This is useful for C FFI where you have a char* buffer.
254
    ///
255
    /// Invalid UTF-8 sequences are replaced with U+FFFD to maintain
256
    /// the UTF-8 invariant required by [`as_str()`](Self::as_str).
257
    #[inline]
258
    pub fn copy_from_bytes(ptr: *const u8, start: usize, len: usize) -> Self {
259
        let raw = U8Vec::copy_from_bytes(ptr, start, len);
260
        let s = String::from_utf8_lossy(raw.as_ref()).into_owned();
261
        Self::from_string(s)
262
    }
263

            
264
    #[inline]
265
7088503
    pub fn from_string(s: String) -> Self {
266
7088503
        Self {
267
7088503
            vec: U8Vec::from_vec(s.into_bytes()),
268
7088503
        }
269
7088503
    }
270

            
271
    #[inline]
272
825123
    pub fn as_str(&self) -> &str {
273
825123
        unsafe { core::str::from_utf8_unchecked(self.vec.as_ref()) }
274
825123
    }
275

            
276
    /// NOTE: CLONES the memory if the memory is external or &'static
277
    /// Moves the memory out if the memory is library-allocated
278
    #[inline]
279
150384
    pub fn clone_self(&self) -> Self {
280
150384
        Self {
281
150384
            vec: self.vec.clone_self(),
282
150384
        }
283
150384
    }
284

            
285
    #[inline]
286
43970
    pub fn into_library_owned_string(self) -> String {
287
43970
        match self.vec.destructor {
288
            U8VecDestructor::NoDestructor | U8VecDestructor::External(_) | U8VecDestructor::AlreadyDestroyed => {
289
                self.as_str().to_string()
290
            }
291
            U8VecDestructor::DefaultRust => {
292
43970
                let m = core::mem::ManuallyDrop::new(self);
293
43970
                unsafe { String::from_raw_parts(m.vec.ptr as *mut u8, m.vec.len, m.vec.cap) }
294
            }
295
        }
296
43970
    }
297

            
298
    #[inline]
299
    pub fn as_bytes(&self) -> &[u8] {
300
        self.vec.as_ref()
301
    }
302

            
303
    #[inline]
304
    pub fn into_bytes(self) -> U8Vec {
305
        let m = core::mem::ManuallyDrop::new(self);
306
        U8Vec {
307
            ptr: m.vec.ptr,
308
            len: m.vec.len,
309
            cap: m.vec.cap,
310
            destructor: m.vec.destructor,
311
        }
312
    }
313

            
314
    /// Returns the length of the string in bytes (not including null terminator)
315
    #[inline]
316
1
    pub fn len(&self) -> usize {
317
1
        self.vec.len
318
1
    }
319

            
320
    /// Returns true if the string is empty
321
    #[inline]
322
    pub fn is_empty(&self) -> bool {
323
        self.vec.len == 0
324
    }
325

            
326
    /// Creates a null-terminated copy of the string for C FFI usage.
327
    /// Returns a new U8Vec that contains the string data followed by a null byte.
328
    /// The caller is responsible for freeing this memory.
329
    ///
330
    /// Use this when you need to pass a string to C code that expects `const char*`.
331
    #[inline]
332
    pub fn to_c_str(&self) -> U8Vec {
333
        let bytes = self.as_bytes();
334
        let mut result = Vec::with_capacity(bytes.len() + 1);
335
        result.extend_from_slice(bytes);
336
        result.push(0); // null terminator
337
        U8Vec::from_vec(result)
338
    }
339

            
340
    /// Shared implementation for UTF-16 decoding with a caller-supplied byte-order function.
341
    ///
342
    /// # Safety
343
    /// - `ptr` must be valid for reading `len` bytes
344
    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
345
    unsafe fn from_utf16_with_byte_order(
346
        ptr: *const u8,
347
        len: usize,
348
        from_bytes: fn([u8; 2]) -> u16,
349
    ) -> Self {
350
        if ptr.is_null() || len == 0 {
351
            return Self::default();
352
        }
353

            
354
        // UTF-16 requires pairs of bytes
355
        if !len.is_multiple_of(2) {
356
            return Self::default();
357
        }
358

            
359
        let byte_slice = core::slice::from_raw_parts(ptr, len);
360
        let code_units: Vec<u16> = byte_slice
361
            .chunks_exact(2)
362
            .map(|chunk| from_bytes([chunk[0], chunk[1]]))
363
            .collect();
364

            
365
        match String::from_utf16(&code_units) {
366
            Ok(s) => Self::from_string(s),
367
            Err(_) => Self::default(),
368
        }
369
    }
370

            
371
    /// Creates a new AzString from UTF-16 encoded bytes (little-endian).
372
    /// Returns an empty string if the input is invalid UTF-16 or has odd length.
373
    ///
374
    /// # Arguments
375
    /// * `ptr` - Pointer to UTF-16 encoded bytes
376
    /// * `len` - Length in bytes (not code units) - must be even
377
    ///
378
    /// # Safety
379
    /// - `ptr` must be valid for reading `len` bytes
380
    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
381
    #[inline]
382
    pub unsafe fn from_utf16_le(ptr: *const u8, len: usize) -> Self {
383
        Self::from_utf16_with_byte_order(ptr, len, u16::from_le_bytes)
384
    }
385

            
386
    /// Creates a new AzString from UTF-16 encoded bytes (big-endian).
387
    /// Returns an empty string if the input is invalid UTF-16 or has odd length.
388
    ///
389
    /// # Arguments
390
    /// * `ptr` - Pointer to UTF-16 encoded bytes
391
    /// * `len` - Length in bytes (not code units) - must be even
392
    ///
393
    /// # Safety
394
    /// - `ptr` must be valid for reading `len` bytes
395
    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
396
    #[inline]
397
    pub unsafe fn from_utf16_be(ptr: *const u8, len: usize) -> Self {
398
        Self::from_utf16_with_byte_order(ptr, len, u16::from_be_bytes)
399
    }
400

            
401
    /// Creates a new AzString from UTF-8 bytes with lossy conversion.
402
    /// Invalid UTF-8 sequences are replaced with the Unicode replacement character (U+FFFD).
403
    ///
404
    /// # Safety
405
    /// - `ptr` must be valid for reading `len` bytes
406
    #[inline]
407
    pub unsafe fn from_utf8_lossy(ptr: *const u8, len: usize) -> Self {
408
        if ptr.is_null() || len == 0 {
409
            return Self::default();
410
        }
411
        
412
        let byte_slice = core::slice::from_raw_parts(ptr, len);
413
        let s = String::from_utf8_lossy(byte_slice).into_owned();
414
        Self::from_string(s)
415
    }
416

            
417
    /// Creates a new AzString from UTF-8 bytes.
418
    /// Returns an empty string if the input is not valid UTF-8.
419
    ///
420
    /// # Safety
421
    /// - `ptr` must be valid for reading `len` bytes
422
    #[inline]
423
    pub unsafe fn from_utf8(ptr: *const u8, len: usize) -> Self {
424
        if ptr.is_null() || len == 0 {
425
            return Self::default();
426
        }
427
        
428
        let byte_slice = core::slice::from_raw_parts(ptr, len);
429
        match core::str::from_utf8(byte_slice) {
430
            Ok(s) => Self::from_string(s.to_string()),
431
            Err(_) => Self::default(),
432
        }
433
    }
434
}
435

            
436
impl From<String> for AzString {
437
3952357
    fn from(input: String) -> AzString {
438
3952357
        AzString::from_string(input)
439
3952357
    }
440
}
441

            
442
impl PartialOrd for AzString {
443
    fn partial_cmp(&self, rhs: &Self) -> Option<core::cmp::Ordering> {
444
        self.as_str().partial_cmp(rhs.as_str())
445
    }
446
}
447

            
448
impl Ord for AzString {
449
    fn cmp(&self, rhs: &Self) -> core::cmp::Ordering {
450
        self.as_str().cmp(rhs.as_str())
451
    }
452
}
453

            
454
impl Clone for AzString {
455
144122
    fn clone(&self) -> Self {
456
144122
        self.clone_self()
457
144122
    }
458
}
459

            
460
impl PartialEq for AzString {
461
88
    fn eq(&self, rhs: &Self) -> bool {
462
88
        self.as_str().eq(rhs.as_str())
463
88
    }
464
}
465

            
466
impl Eq for AzString {}
467

            
468
impl core::hash::Hash for AzString {
469
81215
    fn hash<H>(&self, state: &mut H)
470
81215
    where
471
81215
        H: core::hash::Hasher,
472
    {
473
81215
        self.as_str().hash(state)
474
81215
    }
475
}
476

            
477
impl core::ops::Deref for AzString {
478
    type Target = str;
479

            
480
35551
    fn deref(&self) -> &str {
481
35551
        self.as_str()
482
35551
    }
483
}
484

            
485
impl_option!(
486
    u8,
487
    OptionU8,
488
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
489
);
490

            
491
impl_vec!(u8, U8Vec, U8VecDestructor, U8VecDestructorType, U8VecSlice, OptionU8);
492
impl_vec_mut!(u8, U8Vec);
493
impl_vec_debug!(u8, U8Vec);
494
impl_vec_partialord!(u8, U8Vec);
495
impl_vec_ord!(u8, U8Vec);
496
impl_vec_clone!(u8, U8Vec, U8VecDestructor);
497
impl_vec_partialeq!(u8, U8Vec);
498
impl_vec_eq!(u8, U8Vec);
499
impl_vec_hash!(u8, U8Vec);
500

            
501
impl U8Vec {
502
    /// Copies bytes from a pointer into a new Vec.
503
    /// This is useful for C FFI where you have a uint8_t* buffer.
504
    ///
505
    /// # Safety contract (caller must ensure)
506
    /// - `ptr` must be valid for reading `start + len` bytes
507
    /// - `start + len` must not overflow
508
    #[inline]
509
    pub fn copy_from_bytes(ptr: *const u8, start: usize, len: usize) -> Self {
510
        if ptr.is_null() || len == 0 {
511
            return Self::new();
512
        }
513
        debug_assert!(
514
            start.checked_add(len).is_some(),
515
            "U8Vec::copy_from_bytes: start + len overflows"
516
        );
517
        let slice = unsafe { core::slice::from_raw_parts(ptr.add(start), len) };
518
        Self::from_vec(slice.to_vec())
519
    }
520
}
521

            
522
impl_option!(
523
    U8Vec,
524
    OptionU8Vec,
525
    copy = false,
526
    [Debug, Clone, PartialEq, Ord, PartialOrd, Eq, Hash]
527
);
528

            
529
impl_vec!(u16, U16Vec, U16VecDestructor, U16VecDestructorType, U16VecSlice, OptionU16);
530
impl_vec_debug!(u16, U16Vec);
531
impl_vec_partialord!(u16, U16Vec);
532
impl_vec_ord!(u16, U16Vec);
533
impl_vec_clone!(u16, U16Vec, U16VecDestructor);
534
impl_vec_partialeq!(u16, U16Vec);
535
impl_vec_eq!(u16, U16Vec);
536
impl_vec_hash!(u16, U16Vec);
537

            
538
impl_vec!(f32, F32Vec, F32VecDestructor, F32VecDestructorType, F32VecSlice, OptionF32);
539
impl_vec_debug!(f32, F32Vec);
540
impl_vec_partialord!(f32, F32Vec);
541
impl_vec_clone!(f32, F32Vec, F32VecDestructor);
542
impl_vec_partialeq!(f32, F32Vec);
543

            
544
// Vec<char>
545
impl_vec!(u32, U32Vec, U32VecDestructor, U32VecDestructorType, U32VecSlice, OptionU32);
546
impl_vec_mut!(u32, U32Vec);
547
impl_vec_debug!(u32, U32Vec);
548
impl_vec_partialord!(u32, U32Vec);
549
impl_vec_ord!(u32, U32Vec);
550
impl_vec_clone!(u32, U32Vec, U32VecDestructor);
551
impl_vec_partialeq!(u32, U32Vec);
552
impl_vec_eq!(u32, U32Vec);
553
impl_vec_hash!(u32, U32Vec);
554

            
555
impl_vec!(AzString, StringVec, StringVecDestructor, StringVecDestructorType, StringVecSlice, OptionString);
556
impl_vec_debug!(AzString, StringVec);
557
impl_vec_partialord!(AzString, StringVec);
558
impl_vec_ord!(AzString, StringVec);
559
impl_vec_clone!(AzString, StringVec, StringVecDestructor);
560
impl_vec_partialeq!(AzString, StringVec);
561
impl_vec_eq!(AzString, StringVec);
562
impl_vec_hash!(AzString, StringVec);
563

            
564
impl From<Vec<String>> for StringVec {
565
    fn from(v: Vec<String>) -> StringVec {
566
        let new_v: Vec<AzString> = v.into_iter().map(|s| s.into()).collect();
567
        new_v.into()
568
    }
569
}
570

            
571
impl_option!(
572
    StringVec,
573
    OptionStringVec,
574
    copy = false,
575
    [Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Hash]
576
);
577

            
578
impl_option!(
579
    u16,
580
    OptionU16,
581
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
582
);
583
impl_option!(
584
    u32,
585
    OptionU32,
586
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
587
);
588
impl_option!(
589
    u64,
590
    OptionU64,
591
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
592
);
593
impl_option!(
594
    usize,
595
    OptionUsize,
596
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
597
);
598
impl_option!(
599
    i16,
600
    OptionI16,
601
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
602
);
603
impl_option!(
604
    i32,
605
    OptionI32,
606
    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
607
);
608
impl_option!(bool, OptionBool, [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]);
609
impl_option!(f32, OptionF32, [Debug, Copy, Clone, PartialEq, PartialOrd]);
610
impl_option!(f64, OptionF64, [Debug, Copy, Clone, PartialEq, PartialOrd]);
611

            
612
// Manual implementations for Hash and Ord on OptionF32 (since f32 doesn't implement these traits)
613
impl core::hash::Hash for OptionF32 {
614
    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
615
        match self {
616
            OptionF32::None => 0u8.hash(state),
617
            OptionF32::Some(v) => {
618
                1u8.hash(state);
619
                v.to_bits().hash(state);
620
            }
621
        }
622
    }
623
}
624

            
625
impl Eq for OptionF32 {}
626

            
627
impl Ord for OptionF32 {
628
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
629
        match (self, other) {
630
            (OptionF32::None, OptionF32::None) => core::cmp::Ordering::Equal,
631
            (OptionF32::None, OptionF32::Some(_)) => core::cmp::Ordering::Less,
632
            (OptionF32::Some(_), OptionF32::None) => core::cmp::Ordering::Greater,
633
            (OptionF32::Some(a), OptionF32::Some(b)) => {
634
                a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal)
635
            }
636
        }
637
    }
638
}
639

            
640
// ============================================================================
641
// StringArena — bump allocator for AzString bytes
642
// ============================================================================
643
//
644
// Consolidates thousands of small AzString allocations (tag names,
645
// attribute values, text content) into a handful of 64 KiB chunks.
646
// Each arena-backed AzString uses `U8VecDestructor::External` and stashes
647
// a cloned `Arc<StringArenaInner>` pointer in the `cap` field — dropping
648
// the AzString decrements the refcount, and the backing bytes are freed
649
// only when the last reference goes away. This works across FFI without
650
// changing any public struct layout.
651

            
652
use alloc::sync::Arc;
653
use core::cell::UnsafeCell;
654

            
655
/// Shared interior of a [`StringArena`]. Refcounted via `Arc<Self>`;
656
/// never accessed through its `Arc` for mutation — only the owning
657
/// `StringArena` (with `&mut self`) mutates the chunks.
658
struct StringArenaInner {
659
    /// Pre-allocated byte chunks. Pointers into a chunk stay valid
660
    /// because we never push past `Vec::capacity()` — no reallocation.
661
    chunks: UnsafeCell<Vec<Vec<u8>>>,
662
    /// Remaining unused bytes in the last chunk; `0` when a fresh
663
    /// chunk is needed.
664
    current_remaining: UnsafeCell<usize>,
665
}
666

            
667
// Safety:
668
// - Mutation through `UnsafeCell` only happens via `&mut StringArena`,
669
//   which owns the sole external reference to `Arc<StringArenaInner>`
670
//   held in a `StringArena`. Other `Arc` references live inside AzString
671
//   destructors and never touch chunks — they only drop the Arc.
672
// - `Arc<T>` itself needs `T: Send + Sync` to cross threads; since the
673
//   destructor can run on any thread, we claim Send+Sync and rely on the
674
//   single-writer invariant for mutation safety.
675
unsafe impl Send for StringArenaInner {}
676
unsafe impl Sync for StringArenaInner {}
677

            
678
/// Bump allocator backing arena-owned `AzString` instances.
679
///
680
/// Every AzString returned by [`StringArena::intern`] holds a cloned
681
/// `Arc` to this arena; the backing bytes stay alive until the last
682
/// such AzString (and the arena handle itself) is dropped.
683
///
684
/// Intended use: create one arena per XML/HTML parse pass, intern all
685
/// tag names / attribute values / text content through it, then drop the
686
/// handle. The AzStrings embedded in the resulting `StyledDom` keep the
687
/// arena alive for as long as they need the bytes.
688
pub struct StringArena {
689
    inner: Arc<StringArenaInner>,
690
}
691

            
692
impl StringArena {
693
    /// Size of a freshly allocated chunk. Large enough that a typical
694
    /// DOM parse fits in 1-2 chunks, small enough to not over-allocate
695
    /// for small documents.
696
    pub const CHUNK_SIZE: usize = 64 * 1024;
697

            
698
5
    pub fn new() -> Self {
699
5
        Self {
700
5
            inner: Arc::new(StringArenaInner {
701
5
                chunks: UnsafeCell::new(Vec::new()),
702
5
                current_remaining: UnsafeCell::new(0),
703
5
            }),
704
5
        }
705
5
    }
706

            
707
    /// Returns `(chunk_count, total_bytes_used)` for metrics.
708
1
    pub fn metrics(&self) -> (usize, usize) {
709
        // Safety: metrics is read-only; the caller holds &self so no
710
        // concurrent mutation via &mut self is possible.
711
        unsafe {
712
1
            let chunks = &*self.inner.chunks.get();
713
1
            let total: usize = chunks.iter().map(|c| c.len()).sum();
714
1
            (chunks.len(), total)
715
        }
716
1
    }
717

            
718
    /// Intern `s` into the arena and return an AzString whose backing
719
    /// bytes live inside the arena. The returned AzString owns a cloned
720
    /// `Arc` reference; dropping it decrements the refcount, and the
721
    /// arena frees its chunks when the final reference is released.
722
106
    pub fn intern(&mut self, s: &str) -> AzString {
723
106
        let bytes = s.as_bytes();
724
106
        let len = bytes.len();
725

            
726
106
        let ptr: *const u8 = if len == 0 {
727
            // Empty strings don't need arena storage; a non-null dangling
728
            // pointer is fine because `len == 0` means nobody will deref.
729
1
            core::ptr::NonNull::<u8>::dangling().as_ptr()
730
        } else {
731
            // Safety: `&mut self` ⇒ exclusive access to inner chunks.
732
            unsafe {
733
105
                let chunks: &mut Vec<Vec<u8>> = &mut *self.inner.chunks.get();
734
105
                let remaining: &mut usize = &mut *self.inner.current_remaining.get();
735

            
736
                // Oversized strings get their own dedicated chunk so we
737
                // don't waste the tail of the current chunk.
738
105
                if len > Self::CHUNK_SIZE / 2 {
739
1
                    let mut v = Vec::with_capacity(len);
740
1
                    v.extend_from_slice(bytes);
741
1
                    let p = v.as_ptr();
742
1
                    chunks.push(v);
743
1
                    p
744
                } else {
745
104
                    if *remaining < len {
746
4
                        chunks.push(Vec::with_capacity(Self::CHUNK_SIZE));
747
4
                        *remaining = Self::CHUNK_SIZE;
748
100
                    }
749
                    // Safety: chunk was allocated with capacity ≥ len and
750
                    // `remaining` tracks unused capacity — no realloc.
751
104
                    let chunk = chunks.last_mut().unwrap();
752
104
                    let offset = chunk.len();
753
104
                    chunk.extend_from_slice(bytes);
754
104
                    *remaining -= len;
755
104
                    chunk.as_ptr().add(offset)
756
                }
757
            }
758
        };
759

            
760
        // Each AzString carries its own Arc reference count. Stash the
761
        // raw Arc pointer in `cap` so the External destructor can decrement.
762
106
        let arc_raw = Arc::into_raw(Arc::clone(&self.inner));
763

            
764
106
        AzString {
765
106
            vec: U8Vec {
766
106
                ptr,
767
106
                len,
768
106
                // NOTE: `cap` stores an Arc pointer, not a capacity. This
769
106
                // works because the `External` destructor path never calls
770
106
                // `Vec::from_raw_parts(ptr, len, cap)` — only `DefaultRust`
771
106
                // does that.
772
106
                cap: arc_raw as usize,
773
106
                destructor: U8VecDestructor::External(arena_string_destructor),
774
106
            },
775
106
        }
776
106
    }
777
}
778

            
779
impl Default for StringArena {
780
    fn default() -> Self {
781
        Self::new()
782
    }
783
}
784

            
785
/// Destructor installed on every arena-backed AzString. Reads the Arc
786
/// pointer out of `cap` and drops one Arc reference; when the count
787
/// reaches zero the `StringArenaInner` is freed.
788
106
extern "C" fn arena_string_destructor(vec: *mut U8Vec) {
789
    // Safety: called at most once per AzString drop. `cap` was set by
790
    // `StringArena::intern` to `Arc::into_raw(Arc<StringArenaInner>)`.
791
    unsafe {
792
106
        let v = &mut *vec;
793
106
        let arc_raw = v.cap as *const StringArenaInner;
794
106
        if !arc_raw.is_null() {
795
106
            let _ = Arc::from_raw(arc_raw);
796
106
            // Prevent a hypothetical double-drop from dereferencing
797
106
            // freed memory.
798
106
            v.cap = 0;
799
106
        }
800
    }
801
106
}
802

            
803
#[cfg(test)]
804
mod string_arena_tests {
805
    use super::*;
806

            
807
    #[test]
808
1
    fn intern_round_trip() {
809
1
        let mut arena = StringArena::new();
810
1
        let a = arena.intern("hello");
811
1
        let b = arena.intern("world");
812
1
        let c = arena.intern("");
813
1
        assert_eq!(a.as_str(), "hello");
814
1
        assert_eq!(b.as_str(), "world");
815
1
        assert_eq!(c.as_str(), "");
816
1
    }
817

            
818
    #[test]
819
1
    fn strings_outlive_arena_handle() {
820
1
        let a = {
821
1
            let mut arena = StringArena::new();
822
1
            arena.intern("survives drop of arena handle")
823
        };
824
1
        assert_eq!(a.as_str(), "survives drop of arena handle");
825
1
    }
826

            
827
    #[test]
828
1
    fn oversized_string_gets_dedicated_chunk() {
829
1
        let mut arena = StringArena::new();
830
1
        let big = "x".repeat(StringArena::CHUNK_SIZE);
831
1
        let s = arena.intern(&big);
832
1
        assert_eq!(s.len(), big.len());
833
1
        assert_eq!(s.as_str(), big.as_str());
834
1
    }
835

            
836
    #[test]
837
1
    fn many_small_strings_share_chunk() {
838
1
        let mut arena = StringArena::new();
839
1
        let mut strings = Vec::new();
840
101
        for i in 0..100 {
841
100
            strings.push(arena.intern(&format!("s{i}")));
842
100
        }
843
1
        let (chunks, _bytes) = arena.metrics();
844
1
        assert!(chunks <= 2, "expected ≤2 chunks for 100 small strings, got {chunks}");
845
100
        for (i, s) in strings.iter().enumerate() {
846
100
            assert_eq!(s.as_str(), format!("s{i}"));
847
        }
848
1
    }
849

            
850
    #[test]
851
1
    fn clone_deep_copies_and_is_independent() {
852
        // Cloning an External AzString deep-copies into DefaultRust, so
853
        // the clone doesn't depend on the arena at all.
854
1
        let clone = {
855
1
            let mut arena = StringArena::new();
856
1
            let a = arena.intern("deep-copy test");
857
1
            a.clone()
858
        };
859
1
        assert_eq!(clone.as_str(), "deep-copy test");
860
1
    }
861
}