1
//! XML/HTML parsing module for the Azul toolkit.
2
//!
3
//! Provides two parsing paths:
4
//! - `parse_xml_string`: builds an `XmlNode` tree (used by `domxml_from_str`)
5
//! - `parse_xml_to_fast_dom_with_css`: builds an arena-based `FastDom` directly
6
//!   from XML tokens (used by `parse_xml_to_styled_dom`)
7
//!
8
//! Both paths handle HTML5-lite features: void elements, auto-closing tags,
9
//! XML entity decoding, `<style>` CSS extraction, and BOM/DOCTYPE stripping.
10
//!
11
//! Data types (`XmlNode`, `XmlError`, etc.) live in `azul_core::xml`; this
12
//! module provides the parsing implementations.
13

            
14
#![allow(unused_variables)]
15

            
16
use alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec};
17
use core::fmt;
18
#[cfg(feature = "std")]
19
use std::path::Path;
20

            
21
#[cfg(feature = "svg")]
22
pub mod svg;
23

            
24
/// Decodes XML/HTML entities in a string.
25
/// Handles standard XML entities: &lt; &gt; &amp; &apos; &quot;
26
/// and numeric character references: &#60; &#x3C;
27
/// Returns Cow::Borrowed when no entities are found (zero-alloc fast path).
28
87290
fn decode_xml_entities(s: &str) -> std::borrow::Cow<'_, str> {
29
    // Fast path: if no ampersand, no entities to decode
30
87290
    if !s.contains('&') {
31
87290
        return std::borrow::Cow::Borrowed(s);
32
    }
33
    decode_xml_entities_slow(s)
34
87290
}
35

            
36
fn decode_xml_entities_slow(s: &str) -> std::borrow::Cow<'_, str> {
37
    let mut result = String::with_capacity(s.len());
38
    let mut chars = s.chars().peekable();
39
    
40
    while let Some(c) = chars.next() {
41
        if c == '&' {
42
            // Collect the entity reference
43
            let mut entity = String::new();
44
            let mut found_semicolon = false;
45
            
46
            while let Some(&next) = chars.peek() {
47
                if next == ';' {
48
                    chars.next();
49
                    found_semicolon = true;
50
                    break;
51
                }
52
                if !next.is_alphanumeric() && next != '#' {
53
                    break;
54
                }
55
                entity.push(chars.next().unwrap());
56
                if entity.len() > 10 {
57
                    // Entity too long, not a valid entity
58
                    break;
59
                }
60
            }
61
            
62
            if found_semicolon {
63
                // Try to decode the entity
64
                match entity.as_str() {
65
                    "lt" => result.push('<'),
66
                    "gt" => result.push('>'),
67
                    "amp" => result.push('&'),
68
                    "apos" => result.push('\''),
69
                    "quot" => result.push('"'),
70
                    "nbsp" => result.push('\u{00A0}'),
71
                    s if s.starts_with('#') => {
72
                        // Numeric character reference
73
                        let num_str = &s[1..];
74
                        let code_point = if num_str.starts_with('x') || num_str.starts_with('X') {
75
                            // Hexadecimal
76
                            u32::from_str_radix(&num_str[1..], 16).ok()
77
                        } else {
78
                            // Decimal
79
                            num_str.parse::<u32>().ok()
80
                        };
81
                        if let Some(cp) = code_point {
82
                            if let Some(ch) = char::from_u32(cp) {
83
                                result.push(ch);
84
                            } else {
85
                                // Invalid code point, keep original
86
                                result.push('&');
87
                                result.push_str(&entity);
88
                                result.push(';');
89
                            }
90
                        } else {
91
                            // Parse failed, keep original
92
                            result.push('&');
93
                            result.push_str(&entity);
94
                            result.push(';');
95
                        }
96
                    }
97
                    _ => {
98
                        // Unknown entity, keep original
99
                        result.push('&');
100
                        result.push_str(&entity);
101
                        result.push(';');
102
                    }
103
                }
104
            } else {
105
                // No semicolon found, not a valid entity reference
106
                result.push('&');
107
                result.push_str(&entity);
108
            }
109
        } else {
110
            result.push(c);
111
        }
112
    }
113
    
114
    std::borrow::Cow::Owned(result)
115
}
116

            
117
pub use azul_core::xml::*;
118
use azul_core::{dom::Dom, impl_from, styled_dom::StyledDom, window::StringPairVec};
119
#[cfg(feature = "parser")]
120
use azul_css::parser2::CssParseError;
121
use azul_css::{css::Css, AzString, OptionString, U8Vec};
122
use xmlparser::Tokenizer;
123

            
124
#[cfg(feature = "xml")]
125
2625
pub fn domxml_from_str(xml: &str, component_map: &ComponentMap) -> DomXml {
126
2625
    let error_css = Css::empty();
127

            
128
2625
    let parsed = match parse_xml_string(&xml) {
129
2625
        Ok(parsed) => parsed,
130
        Err(e) => {
131
            return DomXml {
132
                parsed_dom: {
133
                    let mut dom = Dom::create_body()
134
                        .with_children(vec![Dom::create_text(format!("{}", e))].into());
135
                    StyledDom::create(&mut dom, error_css.clone())
136
                },
137
            };
138
        }
139
    };
140

            
141
2625
    let parsed_dom = match str_to_dom(parsed.as_ref(), component_map, None) {
142
2625
        Ok(o) => o,
143
        Err(e) => {
144
            return DomXml {
145
                parsed_dom: {
146
                    let mut dom = Dom::create_body()
147
                        .with_children(vec![Dom::create_text(format!("{}", e))].into());
148
                    StyledDom::create(&mut dom, error_css.clone())
149
                },
150
            };
151
        }
152
    };
153

            
154
2625
    DomXml { parsed_dom }
155
2625
}
156

            
157
/// Fastest path: parse XML string directly into FastDom without intermediate XmlNode tree.
158
/// Feeds XML tokenizer events directly into CompactDomBuilder, skipping both the
159
/// XmlNode tree construction AND the Dom tree construction.
160
/// Parse XML string directly into a `FastDom` (arena-based DOM) in a single pass.
161
///
162
/// Also extracts `<style>` tag content as CSS. Returns both the FastDom and
163
/// collected CSS stylesheets. No intermediate `XmlNode` tree is built.
164
///
165
/// This is the fastest XML→DOM path: XML tokens feed directly into
166
/// `CompactDomBuilder`, and `<style>` text is collected inline.
167
pub fn parse_xml_to_fast_dom(xml: &str) -> Result<azul_core::dom::FastDom, XmlError> {
168
    let (fast_dom, _css) = parse_xml_to_fast_dom_with_css(xml)?;
169
    Ok(fast_dom)
170
}
171

            
172
/// Parse XML directly into FastDom + extracted CSS, ready for StyledDom.
173
pub fn parse_xml_to_styled_dom(xml: &str) -> Result<StyledDom, XmlError> {
174
    // Optional per-phase RSS/timing breakdown.
175
    // Gated on AZ_MEM_BREAKDOWN=1 — prints
176
    //   [XML] tokenize+fast_dom       : +XX MiB in YY ms
177
    //   [XML] css attach              : +XX MiB in YY ms
178
    //   [XML] create_from_fast_dom    : +XX MiB in YY ms
179
    // to locate which sub-phase of the parse-cascade dominates the
180
    // RSS jump seen between `page start` and `xml parsed`.
181
    static MEM_ENABLED: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
182
    let mem_on = *MEM_ENABLED.get_or_init(azul_core::profile::memory_enabled);
183

            
184
    let rss0 = if mem_on { peak_rss_bytes() } else { 0 };
185
    let (mut fast_dom, css) = parse_xml_to_fast_dom_with_css(xml)?;
186
    if mem_on {
187
        let rss1 = peak_rss_bytes();
188
        eprintln!(
189
            "[XML] tokenize+fast_dom       : +{:.2} MiB",
190
            (rss1.saturating_sub(rss0)) as f64 / 1024.0 / 1024.0,
191
        );
192
    }
193

            
194
    let rss1 = if mem_on { peak_rss_bytes() } else { 0 };
195
    // Attach CSS to the FastDom
196
    if !css.is_empty() {
197
        let combined_css = Css::new(css.into_iter()
198
            .flat_map(|c| c.rules.into_library_owned_vec())
199
            .collect());
200
        fast_dom.css = vec![azul_core::dom::CssWithNodeId {
201
            node_id: 0, // global scope
202
            css: combined_css,
203
        }].into();
204
    }
205
    if mem_on {
206
        let rss2 = peak_rss_bytes();
207
        eprintln!(
208
            "[XML] css attach              : +{:.2} MiB",
209
            (rss2.saturating_sub(rss1)) as f64 / 1024.0 / 1024.0,
210
        );
211
    }
212

            
213
    // Hint the allocator to return pages freed by the CSS parser.
214
    // The tokenizer+parser created many small allocations (selectors,
215
    // declarations, strings) that are now packed into FastDom. Purging
216
    // here returns those pages before the cascade allocates more.
217
    crate::probe::hint_purge_allocator();
218

            
219
    let rss2 = if mem_on { peak_rss_bytes() } else { 0 };
220
    let styled = StyledDom::create_from_fast_dom(fast_dom);
221

            
222
    // Major purge point: the cascade just freed ~3 MiB of intermediate
223
    // allocations (build-phase Vecs, CSS selector matching state, pruned
224
    // properties). Tell the allocator to return those pages NOW before
225
    // the layout pass allocates more on top of them.
226
    crate::probe::hint_purge_allocator();
227

            
228
    if mem_on {
229
        let rss3 = peak_rss_bytes();
230
        eprintln!(
231
            "[XML] create_from_fast_dom    : +{:.2} MiB",
232
            (rss3.saturating_sub(rss2)) as f64 / 1024.0 / 1024.0,
233
        );
234
    }
235

            
236
    Ok(styled)
237
}
238

            
239
/// Resident-set bytes for RSS checkpoints — mirrors servo-shot's
240
/// `peak_rss_bytes()`. Uses `getrusage(RUSAGE_SELF)` via the
241
/// `probe` feature's `libc` dep; returns 0 without it so the
242
/// caller just doesn't emit meaningful deltas.
243
#[cfg(all(unix, feature = "probe"))]
244
fn peak_rss_bytes() -> u64 {
245
    let mut usage: libc::rusage = unsafe { std::mem::zeroed() };
246
    if unsafe { libc::getrusage(libc::RUSAGE_SELF, &mut usage) } != 0 {
247
        return 0;
248
    }
249
    let ru = usage.ru_maxrss as u64;
250
    // macOS reports bytes, Linux reports KiB.
251
    #[cfg(target_os = "macos")]
252
    { ru }
253
    #[cfg(not(target_os = "macos"))]
254
    { ru.saturating_mul(1024) }
255
}
256

            
257
#[cfg(not(all(unix, feature = "probe")))]
258
fn peak_rss_bytes() -> u64 {
259
    0
260
}
261

            
262
/// Internal: parse XML into FastDom + collected CSS stylesheets.
263
fn parse_xml_to_fast_dom_with_css(xml: &str) -> Result<(azul_core::dom::FastDom, Vec<Css>), XmlError> {
264
    use xmlparser::{ElementEnd::*, Token::*, Tokenizer};
265
    use azul_core::dom::{NodeData, NodeType, IdOrClass, TabIndex};
266
    use azul_core::xml::CompactDomBuilder;
267

            
268
    // Strip BOM
269
    let xml = xml.strip_prefix('\u{FEFF}').unwrap_or(xml);
270
    let mut xml = xml.trim();
271

            
272
    // Skip <?xml ... ?>
273
    if xml.starts_with("<?") {
274
        if let Some(pos) = xml.find("?>") {
275
            xml = &xml[(pos + 2)..];
276
        }
277
    }
278

            
279
    // Skip <!DOCTYPE ...>
280
    let mut xml = xml.trim();
281
    if xml.len() > 9 && xml[..9].to_ascii_lowercase().starts_with("<!doctype") {
282
        if let Some(pos) = xml.find(">") {
283
            xml = &xml[(pos + 1)..];
284
        }
285
    } else if xml.starts_with("<!--") {
286
        if let Some(end) = xml.find("-->") {
287
            xml = &xml[(end + 3)..];
288
            xml = xml.trim();
289
        }
290
    }
291

            
292
    let tokenizer = Tokenizer::from_fragment(xml, 0..xml.len());
293

            
294
    const ESTIMATED_BYTES_PER_NODE: usize = 20;
295
    let estimated_nodes = xml.len() / ESTIMATED_BYTES_PER_NODE;
296
    let mut builder = CompactDomBuilder::with_capacity(estimated_nodes);
297
    let mut collected_css: Vec<Css> = Vec::new();
298
    let mut inside_style_tag = false;
299
    let mut style_text = String::new();
300
    // Track <head> depth: skip DOM nodes inside <head> (still collect <style> CSS).
301
    // This ensures the FastDom contains only <html><body>... as the layout engine expects.
302
    let mut head_depth: usize = 0;
303

            
304
    // Temporary storage for current element's attributes
305
    let mut current_tag: String = String::new();
306
    let mut current_attrs: Vec<(String, String)> = Vec::new();
307
    let mut pending_open = false;
308

            
309
    const VOID_ELEMENTS: &[&str] = &[
310
        "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta",
311
        "param", "source", "track", "wbr",
312
    ];
313

            
314
    // Pre-compute the CSS key map once (used for style= attribute parsing)
315
    let css_key_map = azul_css::props::property::get_css_key_map();
316

            
317
    // One bump arena for every AzString produced during this parse —
318
    // id/class tokens, text nodes, etc. Replaces ~1k small heap allocs
319
    // with a handful of 64 KiB chunks. Each AzString carries its own
320
    // Arc reference to the arena, so the arena survives until the last
321
    // string is dropped (typically when the StyledDom is dropped).
322
    let mut str_arena = azul_css::corety::StringArena::new();
323

            
324
    // Finalize the pending open element: create NodeData from tag + attrs, push to builder
325
    // tag is already lowercase
326
    let finalize_open = |
327
        builder: &mut CompactDomBuilder,
328
        str_arena: &mut azul_css::corety::StringArena,
329
        tag: &str,
330
        attrs: &[(String, String)],
331
        css_key_map: &azul_css::props::property::CssKeyMap,
332
    | {
333
        let node_type = azul_core::xml::tag_to_node_type(tag);
334
        let mut nd = NodeData::create_node(node_type);
335

            
336
        // Apply attributes — build AttributeTypeVec directly (avoids the
337
        // clone + retain dance in set_ids_and_classes for fresh NodeData).
338
        let mut attr_vec: Vec<azul_core::dom::AttributeType> = Vec::new();
339
        for (key, value) in attrs {
340
            match key.as_str() {
341
                "id" => {
342
                    for id in value.split_whitespace() {
343
                        attr_vec.push(azul_core::dom::AttributeType::Id(str_arena.intern(id)));
344
                    }
345
                }
346
                "class" => {
347
                    for class in value.split_whitespace() {
348
                        attr_vec.push(azul_core::dom::AttributeType::Class(str_arena.intern(class)));
349
                    }
350
                }
351
                "focusable" => {
352
                    if let Some(f) = azul_core::xml::parse_bool(value.as_str()) {
353
                        nd.set_tab_index(if f { TabIndex::Auto } else { TabIndex::NoKeyboardFocus });
354
                    }
355
                }
356
                "tabindex" => {
357
                    if let Ok(ti) = value.parse::<isize>() {
358
                        match ti {
359
                            0 => nd.set_tab_index(TabIndex::Auto),
360
                            i if i > 0 => nd.set_tab_index(TabIndex::OverrideInParent(i as u32)),
361
                            _ => nd.set_tab_index(TabIndex::NoKeyboardFocus),
362
                        }
363
                    }
364
                }
365
                "style" => {
366
                    let mut css_attrs = Vec::new();
367
                    for s in value.split(";") {
368
                        let mut s = s.split(":");
369
                        let key = match s.next() { Some(s) => s, None => continue };
370
                        let val = match s.next() { Some(s) => s, None => continue };
371
                        let _ = azul_css::parser2::parse_css_declaration(
372
                            key.trim(), val.trim(),
373
                            azul_css::parser2::ErrorLocationRange::default(),
374
                            css_key_map, &mut Vec::new(), &mut css_attrs,
375
                        );
376
                    }
377
                    let props = css_attrs.into_iter().filter_map(|s| {
378
                        use azul_css::css::CssDeclaration;
379
                        use azul_css::dynamic_selector::CssPropertyWithConditions;
380
                        match s {
381
                            CssDeclaration::Static(s) => Some(CssPropertyWithConditions::simple(s)),
382
                            _ => None,
383
                        }
384
                    }).collect::<Vec<_>>();
385
                    if !props.is_empty() {
386
                        nd.set_css_props(props.into());
387
                    }
388
                }
389
                "contenteditable" => {
390
                    if azul_core::xml::parse_bool(value.as_str()).unwrap_or(false) {
391
                        nd.set_contenteditable(true);
392
                    }
393
                }
394
                _ => {}
395
            }
396
        }
397
        if !attr_vec.is_empty() {
398
            nd.set_attributes(attr_vec.into());
399
        }
400

            
401
        builder.open_node(nd);
402
    };
403

            
404
    let mut last_was_void = false;
405
    let mut tag_stack: Vec<String> = Vec::new(); // for matching close tags
406

            
407
    // Lowercase `src` into `dst`, reusing `dst`'s existing capacity.
408
    // Zero-alloc when dst's capacity is already ≥ src.len() AND no uppercase
409
    // conversion is needed (the happy path for HTML5 where tags are lowercase).
410
    fn lowercase_into(dst: &mut String, src: &str) {
411
        dst.clear();
412
        if src.bytes().all(|b| !b.is_ascii_uppercase()) {
413
            dst.push_str(src);
414
        } else {
415
            dst.reserve(src.len());
416
            for b in src.bytes() {
417
                dst.push(b.to_ascii_lowercase() as char);
418
            }
419
        }
420
    }
421

            
422
    for token in tokenizer {
423
        let token = token.map_err(|e| XmlError::ParserError(translate_xmlparser_error(e)))?;
424
        match token {
425
            ElementStart { local, .. } => {
426
                // Flush any pending open element
427
                if pending_open {
428
                    let is_void = VOID_ELEMENTS.contains(&current_tag.as_str());
429
                    if current_tag == "head" { head_depth += 1; }
430
                    if head_depth == 0 {
431
                        finalize_open(&mut builder, &mut str_arena, &current_tag, &current_attrs, &css_key_map);
432
                        if is_void { builder.close_node(); }
433
                    }
434
                    if !is_void {
435
                        tag_stack.push(core::mem::take(&mut current_tag));
436
                    }
437
                }
438

            
439
                // Reuse the current_tag buffer — avoids ~1023 fresh String
440
                // allocations per parse (one per ElementStart).
441
                lowercase_into(&mut current_tag, local.as_str());
442
                current_attrs.clear();
443
                pending_open = true;
444
                last_was_void = VOID_ELEMENTS.contains(&current_tag.as_str());
445
            }
446
            Attribute { local, value, .. } => {
447
                // decode_xml_entities returns Cow::Borrowed when no entities
448
                // are present (the common case), so `.into_owned()` is the
449
                // only fresh allocation here. The key is copied via
450
                // `to_string()` because we can't hold a borrow across token
451
                // iterations. TODO: when we switch current_attrs to
452
                // Vec<(&str, Cow<str>)> this becomes zero-alloc for the key.
453
                current_attrs.push((local.to_string(), decode_xml_entities(value.as_str()).into_owned()));
454
            }
455
            ElementEnd { end: Open, .. } => {
456
                if pending_open {
457
                    let is_void = VOID_ELEMENTS.contains(&current_tag.as_str());
458
                    if current_tag == "style" {
459
                        inside_style_tag = true;
460
                        style_text.clear();
461
                    }
462
                    if current_tag == "head" { head_depth += 1; }
463
                    if head_depth == 0 {
464
                        finalize_open(&mut builder, &mut str_arena, &current_tag, &current_attrs, &css_key_map);
465
                        if is_void { builder.close_node(); }
466
                    }
467
                    if !is_void {
468
                        // Use take() instead of clone() — after pending_open=false,
469
                        // current_tag is not read again until the next ElementStart
470
                        // reassigns it via lowercase_into.
471
                        tag_stack.push(core::mem::take(&mut current_tag));
472
                    }
473
                    pending_open = false;
474
                }
475
            }
476
            ElementEnd { end: Empty, .. } => {
477
                // Self-closing element: open + immediately close
478
                if pending_open {
479
                    if current_tag == "head" { head_depth += 1; }
480
                    if head_depth == 0 {
481
                        finalize_open(&mut builder, &mut str_arena, &current_tag, &current_attrs, &css_key_map);
482
                        builder.close_node();
483
                    }
484
                    if current_tag == "head" && head_depth > 0 { head_depth -= 1; }
485
                    pending_open = false;
486
                }
487
            }
488
            ElementEnd { end: Close(_, close_value), .. } => {
489
                if pending_open {
490
                    let is_void = VOID_ELEMENTS.contains(&current_tag.as_str());
491
                    if current_tag == "head" { head_depth += 1; }
492
                    if head_depth == 0 {
493
                        finalize_open(&mut builder, &mut str_arena, &current_tag, &current_attrs, &css_key_map);
494
                        if is_void { builder.close_node(); }
495
                    }
496
                    if !is_void {
497
                        tag_stack.push(core::mem::take(&mut current_tag));
498
                    }
499
                    pending_open = false;
500
                }
501

            
502
                let close_lower = close_value.as_str().to_ascii_lowercase();
503
                let close_str = close_lower.as_str();
504
                if VOID_ELEMENTS.contains(&close_str) {
505
                    continue;
506
                }
507

            
508
                // If closing a <style> tag, parse collected CSS
509
                if close_str == "style" && inside_style_tag {
510
                    if !style_text.is_empty() {
511
                        let parsed_css = Css::from_string(core::mem::take(&mut style_text).into());
512
                        collected_css.push(parsed_css);
513
                    }
514
                    inside_style_tag = false;
515
                }
516

            
517
                // Pop until we find matching tag
518
                while let Some(top) = tag_stack.last() {
519
                    let is_match = top == close_str;
520
                    let was_head = top == "head";
521
                    // Pop this tag (unconditionally auto-close mismatched tags)
522
                    let popped = tag_stack.pop().unwrap();
523
                    if popped == "head" && head_depth > 0 { head_depth -= 1; }
524
                    if head_depth == 0 && !was_head {
525
                        builder.close_node();
526
                    }
527
                    if is_match { break; }
528
                }
529
            }
530
            Text { text } => {
531
                if pending_open {
532
                    let is_void = VOID_ELEMENTS.contains(&current_tag.as_str());
533
                    if current_tag == "style" {
534
                        inside_style_tag = true;
535
                        style_text.clear();
536
                    }
537
                    if current_tag == "head" { head_depth += 1; }
538
                    if head_depth == 0 {
539
                        finalize_open(&mut builder, &mut str_arena, &current_tag, &current_attrs, &css_key_map);
540
                        if is_void { builder.close_node(); }
541
                    }
542
                    if !is_void {
543
                        tag_stack.push(current_tag.clone());
544
                    }
545
                    pending_open = false;
546
                }
547

            
548
                let text_str = text.as_str();
549
                if !text_str.is_empty() {
550
                    if inside_style_tag {
551
                        style_text.push_str(text_str);
552
                    } else if head_depth == 0 {
553
                        // Skip whitespace-only text at <html> level (between </head> and <body>)
554
                        // but keep whitespace inside <body> (it's significant for inline layout)
555
                        let inside_body = tag_stack.iter().any(|t| t == "body");
556
                        if inside_body || !text_str.trim().is_empty() {
557
                            let decoded = decode_xml_entities(text_str);
558
                            builder.add_leaf(NodeData::create_text(str_arena.intern(&decoded)));
559
                        }
560
                    }
561
                }
562
            }
563
            _ => {}
564
        }
565
    }
566

            
567
    // Close any remaining open elements
568
    if pending_open {
569
        finalize_open(&mut builder, &mut str_arena, &current_tag, &current_attrs, &css_key_map);
570
    }
571
    while tag_stack.pop().is_some() {
572
        builder.close_node();
573
    }
574

            
575
    // Drop the arena handle explicitly. AzStrings already embedded in
576
    // the FastDom keep the backing bytes alive via their cloned Arc refs.
577
    drop(str_arena);
578

            
579
    Ok((builder.finish(), collected_css))
580
}
581

            
582
/// Loads, parses and builds a DOM from an XML file
583
///
584
/// **Warning**: The file is reloaded from disk on every function call - do not
585
/// use this in release builds! This function deliberately never fails: In an error case,
586
/// the error gets rendered as a `NodeType::Label`.
587
#[cfg(all(feature = "std", feature = "xml"))]
588
pub fn domxml_from_file<I: AsRef<Path>>(
589
    file_path: I,
590
    component_map: &ComponentMap,
591
) -> DomXml {
592
    use std::fs;
593

            
594
    let error_css = Css::empty();
595

            
596
    let xml = match fs::read_to_string(file_path.as_ref()) {
597
        Ok(xml) => xml,
598
        Err(e) => {
599
            return DomXml {
600
                parsed_dom: {
601
                    let mut dom = Dom::create_body()
602
                        .with_children(
603
                            vec![Dom::create_text(format!(
604
                                "Error reading: \"{}\": {}",
605
                                file_path.as_ref().to_string_lossy(),
606
                                e
607
                            ))]
608
                            .into(),
609
                        );
610
                    StyledDom::create(&mut dom, error_css.clone())
611
                },
612
            };
613
        }
614
    };
615

            
616
    domxml_from_str(&xml, component_map)
617
}
618

            
619
/// Parses the XML string into an XML tree, returns
620
/// the root `<app></app>` node, with the children attached to it.
621
///
622
/// Since the XML allows multiple root nodes, this function returns
623
/// a `Vec<XmlNode>` - which are the "root" nodes, containing all their
624
/// children recursively.
625
#[cfg(feature = "xml")]
626
3220
pub fn parse_xml_string(xml: &str) -> Result<Vec<XmlNodeChild>, XmlError> {
627
    use xmlparser::{ElementEnd::*, Token::*, Tokenizer};
628

            
629
    use self::XmlParseError::*;
630

            
631
3220
    let mut root_node = XmlNode::default();
632

            
633
    // Strip UTF-8 BOM if present (some W3C test files have it)
634
3220
    let xml = xml.strip_prefix('\u{FEFF}').unwrap_or(xml);
635

            
636
    // Search for "<?xml" and "?>" tags and delete them from the XML
637
3220
    let mut xml = xml.trim();
638
3220
    if xml.starts_with("<?") {
639
70
        let pos = xml.find("?>").ok_or(XmlError::MalformedHierarchy(
640
70
            azul_core::xml::MalformedHierarchyError {
641
70
                expected: "<?xml".into(),
642
70
                got: "?>".into(),
643
70
            },
644
70
        ))?;
645
70
        xml = &xml[(pos + 2)..];
646
3150
    }
647

            
648
    // Delete <!DOCTYPE ...> if necessary (case-insensitive)
649
3220
    let mut xml = xml.trim();
650
3220
    if xml.len() > 9 && xml[..9].to_ascii_lowercase().starts_with("<!doctype") {
651
        let pos = xml.find(">").ok_or(XmlError::MalformedHierarchy(
652
            azul_core::xml::MalformedHierarchyError {
653
                expected: "<!DOCTYPE".into(),
654
                got: ">".into(),
655
            },
656
        ))?;
657
        xml = &xml[(pos + 1)..];
658
3220
    } else if xml.starts_with("<!--") {
659
        // Skip HTML comments at the start
660
35
        if let Some(end) = xml.find("-->") {
661
35
            xml = &xml[(end + 3)..];
662
35
            xml = xml.trim();
663
35
        }
664
3185
    }
665

            
666
3220
    let tokenizer = Tokenizer::from_fragment(xml, 0..xml.len());
667

            
668
    // OPTIMIZED: Use a stack of raw pointers to avoid O(n*d) traversal on every token.
669
    // This is safe because:
670
    // 1. All pointers point into `root_node` which is owned and not moved
671
    // 2. We never hold multiple mutable references simultaneously
672
    // 3. The stack is only used within this function
673
3220
    let mut node_stack: Vec<*mut XmlNode> = vec![&mut root_node as *mut XmlNode];
674

            
675
    // HTML5-lite parser: List of void elements that should auto-close
676
    // See: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
677
    const VOID_ELEMENTS: &[&str] = &[
678
        "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param",
679
        "source", "track", "wbr",
680
    ];
681

            
682
    // HTML5-lite parser: Elements that auto-close when certain other elements are encountered
683
    // Format: (element_name, closes_when_encountering)
684
    const AUTO_CLOSE_RULES: &[(&str, &[&str])] = &[
685
        // List items close when encountering another list item or when parent closes
686
        ("li", &["li"]),
687
        // Table cells/rows have complex closing rules
688
        ("td", &["td", "th", "tr"]),
689
        ("th", &["td", "th", "tr"]),
690
        ("tr", &["tr"]),
691
        // Paragraphs close on block-level elements
692
        (
693
            "p",
694
            &[
695
                "address",
696
                "article",
697
                "aside",
698
                "blockquote",
699
                "div",
700
                "dl",
701
                "fieldset",
702
                "footer",
703
                "form",
704
                "h1",
705
                "h2",
706
                "h3",
707
                "h4",
708
                "h5",
709
                "h6",
710
                "header",
711
                "hr",
712
                "main",
713
                "nav",
714
                "ol",
715
                "p",
716
                "pre",
717
                "section",
718
                "table",
719
                "ul",
720
            ],
721
        ),
722
        // Option closes on another option or optgroup
723
        ("option", &["option", "optgroup"]),
724
        ("optgroup", &["optgroup"]),
725
        // DD/DT close on each other
726
        ("dd", &["dd", "dt"]),
727
        ("dt", &["dd", "dt"]),
728
    ];
729

            
730
    // Track which hierarchy level is a void element (shouldn't be pushed to hierarchy)
731
3220
    let mut last_was_void = false;
732

            
733
186935
    for token in tokenizer {
734
183715
        let token = token.map_err(|e| XmlError::ParserError(translate_xmlparser_error(e)))?;
735
61110
        match token {
736
35315
            ElementStart { local, .. } => {
737
35315
                let tag_name = local.to_string();
738
35315
                let is_void_element = VOID_ELEMENTS.contains(&tag_name.as_str());
739

            
740
                // HTML5-lite: If last element was a void element (like <img src="...">),
741
                // pop it from hierarchy before processing the new element
742
35315
                if last_was_void {
743
                    node_stack.pop();
744
                    last_was_void = false;
745
35315
                }
746

            
747
                // HTML5-lite: Check if we need to auto-close the current element
748
35315
                if node_stack.len() > 1 {
749
                    // SAFETY: We only access the last element, which is valid
750
32095
                    let current_element = unsafe { &*node_stack[node_stack.len() - 1] };
751
32095
                    let current_tag = current_element.node_type.as_str();
752

            
753
                    // Check if current element should auto-close when encountering this new tag
754
319235
                    for (element, closes_on) in AUTO_CLOSE_RULES {
755
287385
                        if current_tag == *element && closes_on.contains(&tag_name.as_str()) {
756
                            // Auto-close the current element
757
245
                            node_stack.pop();
758
245
                            break;
759
287140
                        }
760
                    }
761
3220
                }
762

            
763
                // SAFETY: We access the last element which is valid
764
35315
                if let Some(&current_parent_ptr) = node_stack.last() {
765
35315
                    let current_parent = unsafe { &mut *current_parent_ptr };
766
                    
767
35315
                    current_parent.children.push(XmlNodeChild::Element(XmlNode {
768
35315
                        node_type: tag_name.into(),
769
35315
                        attributes: StringPairVec::new().into(),
770
35315
                        children: Vec::new().into(),
771
35315
                    }));
772

            
773
                    // Get pointer to the newly added child
774
35315
                    let children_len = current_parent.children.len();
775
35315
                    if let Some(XmlNodeChild::Element(ref mut new_child)) = current_parent.children.as_mut().get_mut(children_len - 1) {
776
35315
                        node_stack.push(new_child as *mut XmlNode);
777
35315
                    }
778
                    
779
35315
                    last_was_void = is_void_element;
780
                }
781
            }
782
            ElementEnd { end: Empty, .. } => {
783
                // Pop hierarchy for all elements (including void elements after their attributes)
784
8960
                if node_stack.len() > 1 {
785
8960
                    node_stack.pop();
786
8960
                }
787
8960
                last_was_void = false;
788
            }
789
            ElementEnd {
790
25795
                end: Close(_, close_value),
791
                ..
792
            } => {
793
                // HTML5-lite: If last element was a void element, pop it first
794
25795
                if last_was_void {
795
140
                    node_stack.pop();
796
140
                    last_was_void = false;
797
25655
                }
798

            
799
                // HTML5-lite: Check if this is a void element - if so, ignore the closing tag
800
25795
                let is_void_element = VOID_ELEMENTS.contains(&close_value.as_str());
801
25795
                if is_void_element {
802
                    // Void elements shouldn't have closing tags, but tolerate them
803
140
                    continue;
804
25655
                }
805

            
806
                // HTML5-lite: Auto-close any elements that should be closed
807
                // Walk up the hierarchy and auto-close elements until we find a match
808
25655
                let close_value_str = close_value.as_str();
809

            
810
                // Find matching element in stack (skip root at index 0)
811
25655
                let mut found_idx = None;
812
25830
                for i in (1..node_stack.len()).rev() {
813
                    // SAFETY: All pointers in stack are valid
814
25830
                    let node = unsafe { &*node_stack[i] };
815
25830
                    if node.node_type.as_str() == close_value_str {
816
25655
                        found_idx = Some(i);
817
25655
                        break;
818
175
                    }
819
                }
820

            
821
25655
                if let Some(idx) = found_idx {
822
25655
                    // Pop all elements from current position to the matching element (inclusive)
823
25655
                    node_stack.truncate(idx);
824
25655
                }
825
                // If no match found, just ignore (lenient HTML parsing)
826

            
827
25655
                last_was_void = false;
828
            }
829
47320
            Attribute { local, value, .. } => {
830
                // SAFETY: Last element in stack is valid
831
47320
                if let Some(&last_ptr) = node_stack.last() {
832
47320
                    let last = unsafe { &mut *last_ptr };
833
47320
                    // NOTE: Only lowercase the key ("local"), not the value!
834
47320
                    // Decode XML entities in attribute values as well
835
47320
                    last.attributes.push(azul_core::window::AzStringPair {
836
47320
                        key: local.to_string().into(),
837
47320
                        value: azul_css::AzString::from(&*decode_xml_entities(value.as_str())),
838
47320
                    });
839
47320
                }
840
            }
841
39970
            Text { text } => {
842
                // HTML5-lite: If last element was a void element, pop it before adding text
843
39970
                if last_was_void {
844
140
                    node_stack.pop();
845
140
                    last_was_void = false;
846
39830
                }
847

            
848
                // IMPORTANT: Preserve ALL text nodes including whitespace-only nodes.
849
                // Whether whitespace is significant depends on the CSS `white-space` property,
850
                // which is determined during layout, not during parsing.
851
                // 
852
                // For example: <pre><span>    </span></pre> must preserve the 4 spaces.
853
                // 
854
                // We only skip completely EMPTY text nodes (zero-length strings).
855
39970
                let text_str = text.as_str();
856

            
857
39970
                if !text_str.is_empty() {
858
                    // SAFETY: Last element in stack is valid
859
39970
                    if let Some(&current_parent_ptr) = node_stack.last() {
860
39970
                        let current_parent = unsafe { &mut *current_parent_ptr };
861
39970
                        // Decode XML entities (e.g., &lt; -> <, &gt; -> >, etc.)
862
39970
                        let decoded_text = decode_xml_entities(text_str);
863
39970
                        // Add text as a child node
864
39970
                        current_parent
865
39970
                            .children
866
39970
                            .push(XmlNodeChild::Text(azul_css::AzString::from(&*decoded_text)));
867
39970
                    }
868
                }
869
            }
870
26355
            _ => {}
871
        }
872
    }
873

            
874
    // Clean up: if we ended with a void element, pop it
875
3220
    if last_was_void {
876
        node_stack.pop();
877
3220
    }
878

            
879
3220
    Ok(root_node.children.into())
880
3220
}
881

            
882
#[cfg(feature = "xml")]
883
pub fn parse_xml(s: &str) -> Result<Xml, XmlError> {
884
    Ok(Xml {
885
        root: parse_xml_string(s)?.into(),
886
    })
887
}
888

            
889
#[cfg(not(feature = "xml"))]
890
pub fn parse_xml(s: &str) -> Result<Xml, XmlError> {
891
    Err(XmlError::NoParserAvailable)
892
}
893

            
894
// to_string(&self) -> String
895

            
896
#[cfg(feature = "xml")]
897
pub fn translate_roxmltree_expandedname<'a, 'b>(
898
    e: roxmltree::ExpandedName<'a, 'b>,
899
) -> XmlQualifiedName {
900
    let ns: Option<AzString> = e.namespace().map(|e| e.to_string().into());
901
    XmlQualifiedName {
902
        local_name: e.name().to_string().into(),
903
        namespace: ns.into(),
904
    }
905
}
906

            
907
#[cfg(feature = "xml")]
908
fn translate_roxmltree_attribute(e: roxmltree::Attribute) -> XmlQualifiedName {
909
    XmlQualifiedName {
910
        local_name: e.name().to_string().into(),
911
        namespace: e.namespace().map(|e| e.to_string().into()).into(),
912
    }
913
}
914

            
915
#[cfg(feature = "xml")]
916
fn translate_xmlparser_streamerror(e: xmlparser::StreamError) -> XmlStreamError {
917
    match e {
918
        xmlparser::StreamError::UnexpectedEndOfStream => XmlStreamError::UnexpectedEndOfStream,
919
        xmlparser::StreamError::InvalidName => XmlStreamError::InvalidName,
920
        xmlparser::StreamError::InvalidReference => XmlStreamError::InvalidReference,
921
        xmlparser::StreamError::InvalidExternalID => XmlStreamError::InvalidExternalID,
922
        xmlparser::StreamError::InvalidCommentData => XmlStreamError::InvalidCommentData,
923
        xmlparser::StreamError::InvalidCommentEnd => XmlStreamError::InvalidCommentEnd,
924
        xmlparser::StreamError::InvalidCharacterData => XmlStreamError::InvalidCharacterData,
925
        xmlparser::StreamError::NonXmlChar(c, tp) => XmlStreamError::NonXmlChar(NonXmlCharError {
926
            ch: c.into(),
927
            pos: translate_xmlparser_textpos(tp),
928
        }),
929
        xmlparser::StreamError::InvalidChar(a, b, tp) => {
930
            XmlStreamError::InvalidChar(InvalidCharError {
931
                expected: a,
932
                got: b,
933
                pos: translate_xmlparser_textpos(tp),
934
            })
935
        }
936
        xmlparser::StreamError::InvalidCharMultiple(a, b, tp) => {
937
            XmlStreamError::InvalidCharMultiple(InvalidCharMultipleError {
938
                expected: a,
939
                got: b.to_vec().into(),
940
                pos: translate_xmlparser_textpos(tp),
941
            })
942
        }
943
        xmlparser::StreamError::InvalidQuote(a, tp) => {
944
            XmlStreamError::InvalidQuote(InvalidQuoteError {
945
                got: a.into(),
946
                pos: translate_xmlparser_textpos(tp),
947
            })
948
        }
949
        xmlparser::StreamError::InvalidSpace(a, tp) => {
950
            XmlStreamError::InvalidSpace(InvalidSpaceError {
951
                got: a.into(),
952
                pos: translate_xmlparser_textpos(tp),
953
            })
954
        }
955
        xmlparser::StreamError::InvalidString(a, tp) => {
956
            XmlStreamError::InvalidString(InvalidStringError {
957
                got: a.to_string().into(),
958
                pos: translate_xmlparser_textpos(tp),
959
            })
960
        }
961
    }
962
}
963

            
964
#[cfg(feature = "xml")]
965
fn translate_xmlparser_error(e: xmlparser::Error) -> XmlParseError {
966
    match e {
967
        xmlparser::Error::InvalidDeclaration(se, tp) => {
968
            XmlParseError::InvalidDeclaration(XmlTextError {
969
                stream_error: translate_xmlparser_streamerror(se),
970
                pos: translate_xmlparser_textpos(tp),
971
            })
972
        }
973
        xmlparser::Error::InvalidComment(se, tp) => XmlParseError::InvalidComment(XmlTextError {
974
            stream_error: translate_xmlparser_streamerror(se),
975
            pos: translate_xmlparser_textpos(tp),
976
        }),
977
        xmlparser::Error::InvalidPI(se, tp) => XmlParseError::InvalidPI(XmlTextError {
978
            stream_error: translate_xmlparser_streamerror(se),
979
            pos: translate_xmlparser_textpos(tp),
980
        }),
981
        xmlparser::Error::InvalidDoctype(se, tp) => XmlParseError::InvalidDoctype(XmlTextError {
982
            stream_error: translate_xmlparser_streamerror(se),
983
            pos: translate_xmlparser_textpos(tp),
984
        }),
985
        xmlparser::Error::InvalidEntity(se, tp) => XmlParseError::InvalidEntity(XmlTextError {
986
            stream_error: translate_xmlparser_streamerror(se),
987
            pos: translate_xmlparser_textpos(tp),
988
        }),
989
        xmlparser::Error::InvalidElement(se, tp) => XmlParseError::InvalidElement(XmlTextError {
990
            stream_error: translate_xmlparser_streamerror(se),
991
            pos: translate_xmlparser_textpos(tp),
992
        }),
993
        xmlparser::Error::InvalidAttribute(se, tp) => {
994
            XmlParseError::InvalidAttribute(XmlTextError {
995
                stream_error: translate_xmlparser_streamerror(se),
996
                pos: translate_xmlparser_textpos(tp),
997
            })
998
        }
999
        xmlparser::Error::InvalidCdata(se, tp) => XmlParseError::InvalidCdata(XmlTextError {
            stream_error: translate_xmlparser_streamerror(se),
            pos: translate_xmlparser_textpos(tp),
        }),
        xmlparser::Error::InvalidCharData(se, tp) => XmlParseError::InvalidCharData(XmlTextError {
            stream_error: translate_xmlparser_streamerror(se),
            pos: translate_xmlparser_textpos(tp),
        }),
        xmlparser::Error::UnknownToken(tp) => {
            XmlParseError::UnknownToken(translate_xmlparser_textpos(tp))
        }
    }
}
#[cfg(feature = "xml")]
pub fn translate_roxmltree_error(e: roxmltree::Error) -> XmlError {
    match e {
        roxmltree::Error::InvalidXmlPrefixUri(s) => {
            XmlError::InvalidXmlPrefixUri(translate_roxml_textpos(s))
        }
        roxmltree::Error::UnexpectedXmlUri(s) => {
            XmlError::UnexpectedXmlUri(translate_roxml_textpos(s))
        }
        roxmltree::Error::UnexpectedXmlnsUri(s) => {
            XmlError::UnexpectedXmlnsUri(translate_roxml_textpos(s))
        }
        roxmltree::Error::InvalidElementNamePrefix(s) => {
            XmlError::InvalidElementNamePrefix(translate_roxml_textpos(s))
        }
        roxmltree::Error::DuplicatedNamespace(s, tp) => {
            XmlError::DuplicatedNamespace(DuplicatedNamespaceError {
                ns: s.into(),
                pos: translate_roxml_textpos(tp),
            })
        }
        roxmltree::Error::UnknownNamespace(s, tp) => {
            XmlError::UnknownNamespace(UnknownNamespaceError {
                ns: s.into(),
                pos: translate_roxml_textpos(tp),
            })
        }
        roxmltree::Error::UnexpectedCloseTag(expected, actual, pos) => {
            XmlError::UnexpectedCloseTag(UnexpectedCloseTagError {
                expected: expected.into(),
                actual: actual.into(),
                pos: translate_roxml_textpos(pos),
            })
        }
        roxmltree::Error::UnexpectedEntityCloseTag(s) => {
            XmlError::UnexpectedEntityCloseTag(translate_roxml_textpos(s))
        }
        roxmltree::Error::UnknownEntityReference(s, tp) => {
            XmlError::UnknownEntityReference(UnknownEntityReferenceError {
                entity: s.into(),
                pos: translate_roxml_textpos(tp),
            })
        }
        roxmltree::Error::MalformedEntityReference(s) => {
            XmlError::MalformedEntityReference(translate_roxml_textpos(s))
        }
        roxmltree::Error::EntityReferenceLoop(s) => {
            XmlError::EntityReferenceLoop(translate_roxml_textpos(s))
        }
        roxmltree::Error::InvalidAttributeValue(s) => {
            XmlError::InvalidAttributeValue(translate_roxml_textpos(s))
        }
        roxmltree::Error::DuplicatedAttribute(s, tp) => {
            XmlError::DuplicatedAttribute(DuplicatedAttributeError {
                attribute: s.into(),
                pos: translate_roxml_textpos(tp),
            })
        }
        roxmltree::Error::NoRootNode => XmlError::NoRootNode,
        roxmltree::Error::DtdDetected => XmlError::DtdDetected,
        roxmltree::Error::UnclosedRootNode => XmlError::UnclosedRootNode,
        roxmltree::Error::UnexpectedDeclaration(tp) => {
            XmlError::UnexpectedDeclaration(translate_roxml_textpos(tp))
        }
        roxmltree::Error::NodesLimitReached => XmlError::NodesLimitReached,
        roxmltree::Error::AttributesLimitReached => XmlError::AttributesLimitReached,
        roxmltree::Error::NamespacesLimitReached => XmlError::NamespacesLimitReached,
        roxmltree::Error::InvalidName(tp) => XmlError::InvalidName(translate_roxml_textpos(tp)),
        roxmltree::Error::NonXmlChar(_, tp) => XmlError::NonXmlChar(translate_roxml_textpos(tp)),
        roxmltree::Error::InvalidChar(_, _, tp) => {
            XmlError::InvalidChar(translate_roxml_textpos(tp))
        }
        roxmltree::Error::InvalidChar2(_, _, tp) => {
            XmlError::InvalidChar2(translate_roxml_textpos(tp))
        }
        roxmltree::Error::InvalidString(_, tp) => {
            XmlError::InvalidString(translate_roxml_textpos(tp))
        }
        roxmltree::Error::InvalidExternalID(tp) => {
            XmlError::InvalidExternalID(translate_roxml_textpos(tp))
        }
        roxmltree::Error::InvalidComment(tp) => {
            XmlError::InvalidComment(translate_roxml_textpos(tp))
        }
        roxmltree::Error::InvalidCharacterData(tp) => {
            XmlError::InvalidCharacterData(translate_roxml_textpos(tp))
        }
        roxmltree::Error::UnknownToken(tp) => XmlError::UnknownToken(translate_roxml_textpos(tp)),
        roxmltree::Error::UnexpectedEndOfStream => XmlError::UnexpectedEndOfStream,
        roxmltree::Error::EntityResolver(tp, s) => {
            // New in roxmltree 0.21: EntityResolver error variant
            // For now, treat as a generic entity reference error
            XmlError::UnknownEntityReference(UnknownEntityReferenceError {
                entity: s.into(),
                pos: translate_roxml_textpos(tp),
            })
        }
    }
}
#[cfg(feature = "xml")]
#[inline(always)]
const fn translate_xmlparser_textpos(o: xmlparser::TextPos) -> XmlTextPos {
    XmlTextPos {
        row: o.row,
        col: o.col,
    }
}
#[cfg(feature = "xml")]
#[inline(always)]
const fn translate_roxml_textpos(o: roxmltree::TextPos) -> XmlTextPos {
    XmlTextPos {
        row: o.row,
        col: o.col,
    }
}
/// Extension trait to add XML parsing capabilities to Dom
///
/// This trait provides methods to parse XML/XHTML strings and convert them
/// into Azul DOM trees. It's implemented as a trait to avoid circular dependencies
/// between azul-core and azul-layout.
#[cfg(feature = "xml")]
pub trait DomXmlExt {
    /// Parse XML/XHTML string into a DOM tree
    ///
    /// This method parses the XML string and converts it to an Azul StyledDom.
    /// On error, it returns a StyledDom displaying the error message.
    ///
    /// # Arguments
    /// * `xml` - The XML/XHTML string to parse
    ///
    /// # Returns
    /// A `StyledDom` tree representing the parsed XML, or an error DOM on parse failure
    fn from_xml_string<S: AsRef<str>>(xml: S) -> StyledDom;
}
#[cfg(feature = "xml")]
impl DomXmlExt for Dom {
75
    fn from_xml_string<S: AsRef<str>>(xml: S) -> StyledDom {
75
        let component_map = ComponentMap::with_builtin();
75
        let dom_xml = domxml_from_str(xml.as_ref(), &component_map);
75
        dom_xml.parsed_dom
75
    }
}