1
//! Optional fine-grained timing + RSS instrumentation.
2
//!
3
//! Behind the `probe` feature flag every [`Probe::span`] returns a guard
4
//! that records the elapsed wall-clock on `Drop`, and
5
//! [`Probe::sample_rss`] records a labelled RSS checkpoint. Events are
6
//! buffered in a per-thread [`Vec`] and drained by the consumer with
7
//! [`Probe::drain`].
8
//!
9
//! With the feature off every method is a `#[inline(always)]` no-op so
10
//! release builds without the feature pay zero cost.
11
//!
12
//! Consumer (e.g. servo-shot) groups drained events by name to produce
13
//! the per-phase averages / p99s in its trace report.
14

            
15
use core::marker::PhantomData;
16

            
17
// WASM gate: `Instant::now()` panics on browser WASM (no monotonic clock)
18
// and `libc::getrusage` isn't available, so on `target_family = "wasm"`
19
// we drop to the no-op stubs even when the `probe` feature is on.
20
// `AZ_PROFILE=cpu` then prints "(probe unavailable on this target)"
21
// rather than crashing.
22

            
23
// [WEB-LIFT 2026-06-11] `web_lift` also forces the no-op imp: the real
24
// module is Instant::now (mach-time syscall, out-of-image when lifted) +
25
// thread-local pushes + first-access dtor registration (`_tlv_atexit`).
26
// With the TLV emulation in place TLS "works", which flips these from
27
// harmlessly-failing (`try_with` Err) to actually-running — and the
28
// mach/atexit extern calls inside are unliftable. Profiling is
29
// meaningless in lifted wasm; the dylib built with `web-transpiler*`
30
// (which enables `web_lift`) is the web-server build, so desktop
31
// release builds keep real probes.
32
#[cfg(all(
33
    feature = "probe",
34
    not(target_family = "wasm"),
35
    not(feature = "web_lift")
36
))]
37
mod imp {
38
    use std::cell::RefCell;
39
    use std::time::Instant;
40

            
41
    thread_local! {
42
        static EVENTS: RefCell<Vec<super::Event>> = const { RefCell::new(Vec::new()) };
43
    }
44

            
45
    /// RAII guard that records its name + elapsed nanos on drop.
46
    pub struct Span {
47
        pub(crate) name: &'static str,
48
        pub(crate) start: Instant,
49
    }
50

            
51
    impl Drop for Span {
52
        fn drop(&mut self) {
53
            let dur_ns = self.start.elapsed().as_nanos() as u64;
54
            // try_with (not with): the lifted-to-wasm web backend has no real
55
            // TLS, so `with` hits panic_access_error. These probe accesses are
56
            // inlined into layout_dom_recursive/layout_document, so they can't
57
            // be stubbed at the symbol level — use the non-panicking access.
58
            let _ = EVENTS.try_with(|cell| {
59
                cell.borrow_mut().push(super::Event {
60
                    name: self.name,
61
                    kind: super::EventKind::Span { dur_ns },
62
                });
63
            });
64
        }
65
    }
66

            
67
    pub(super) fn open(name: &'static str) -> Span {
68
        Span { name, start: Instant::now() }
69
    }
70

            
71
    pub(super) fn sample_rss(label: &'static str, bytes: u64) {
72
        // try_with: see Span::drop — no real TLS in the lifted wasm backend.
73
        let _ = EVENTS.try_with(|cell| {
74
            cell.borrow_mut().push(super::Event {
75
                name: label,
76
                kind: super::EventKind::Rss { bytes },
77
            });
78
        });
79
    }
80

            
81
    pub(super) fn drain() -> Vec<super::Event> {
82
        EVENTS
83
            .try_with(|cell| core::mem::take(&mut *cell.borrow_mut()))
84
            .unwrap_or_default()
85
    }
86

            
87
    pub(super) fn drop_events() {
88
        let _ = EVENTS.try_with(|cell| cell.borrow_mut().clear());
89
    }
90

            
91
    pub(super) fn peek_len() -> usize {
92
        EVENTS.try_with(|cell| cell.borrow().len()).unwrap_or(0)
93
    }
94

            
95
    pub(super) fn enabled() -> bool {
96
        true
97
    }
98
}
99

            
100
#[cfg(any(
101
    not(feature = "probe"),
102
    target_family = "wasm",
103
    feature = "web_lift"
104
))]
105
mod imp {
106
    pub struct Span;
107

            
108
    impl Drop for Span {
109
        #[inline(always)]
110
484352
        fn drop(&mut self) {}
111
    }
112

            
113
    #[inline(always)]
114
484352
    pub(super) fn open(_name: &'static str) -> Span {
115
484352
        Span
116
484352
    }
117

            
118
    #[inline(always)]
119
    pub(super) fn sample_rss(_label: &'static str, _bytes: u64) {}
120

            
121
    #[inline(always)]
122
    pub(super) fn drain() -> Vec<super::Event> {
123
        Vec::new()
124
    }
125

            
126
    #[inline(always)]
127
    pub(super) fn drop_events() {}
128

            
129
    #[inline(always)]
130
    pub(super) fn peek_len() -> usize { 0 }
131

            
132
    #[inline(always)]
133
    pub(super) fn enabled() -> bool {
134
        false
135
    }
136
}
137

            
138
/// Drained probe event. `Vec<Event>` is what consumers walk to render
139
/// trace summaries; the order is the order events fired in.
140
#[derive(Debug, Clone)]
141
pub struct Event {
142
    pub name: &'static str,
143
    pub kind: EventKind,
144
}
145

            
146
#[derive(Debug, Clone)]
147
pub enum EventKind {
148
    /// A timed scope's wall-clock duration.
149
    Span { dur_ns: u64 },
150
    /// A labelled RSS checkpoint.
151
    Rss { bytes: u64 },
152
}
153

            
154
/// Re-exported guard. Held by the caller of [`Probe::span`].
155
pub use imp::Span;
156

            
157
/// Probe API. All methods are no-ops without the `probe` feature.
158
pub struct Probe {
159
    _no_construct: PhantomData<()>,
160
}
161

            
162
impl Probe {
163
    /// Open a timed span. The returned guard records its name + nanos
164
    /// on drop into the thread-local event buffer.
165
    #[inline(always)]
166
484352
    pub fn span(name: &'static str) -> Span {
167
484352
        imp::open(name)
168
484352
    }
169

            
170
    /// Record an RSS checkpoint with the given label + byte count. The
171
    /// caller supplies the bytes (this module does not depend on
172
    /// platform RSS readers) so consumers can use whatever measurement
173
    /// helper they own.
174
    #[inline(always)]
175
    pub fn sample_rss(label: &'static str, bytes: u64) {
176
        imp::sample_rss(label, bytes);
177
    }
178

            
179
    /// Drain the per-thread event buffer.
180
    #[inline(always)]
181
    pub fn drain() -> Vec<Event> {
182
        imp::drain()
183
    }
184

            
185
    /// Discard the per-thread event buffer without allocating a `Vec` to
186
    /// hand back. Used by long-running harnesses (e.g. `AZ_E2E_TEST`) that
187
    /// want to prevent the thread-local buffer from inflating RSS during
188
    /// thousands of layout passes without actually needing the events.
189
    #[inline(always)]
190
    pub fn drop_events() {
191
        imp::drop_events();
192
    }
193

            
194
    /// Current number of events in the per-thread buffer. Cheap to call.
195
    #[inline(always)]
196
    pub fn peek_len() -> usize {
197
        imp::peek_len()
198
    }
199

            
200
    /// Whether the `probe` feature is compiled in.
201
    #[inline(always)]
202
    pub fn enabled() -> bool {
203
        imp::enabled()
204
    }
205
}
206

            
207
/// Same monotonic clock used by `font::parsed::monotonic_now_nanos` for
208
/// LRU stamping. Re-exported here so any caller that wants raw nanos
209
/// without going through a span guard has one source of truth.
210
#[inline]
211
pub fn monotonic_now_nanos() -> u64 {
212
    use std::sync::OnceLock;
213
    use std::time::Instant;
214
    static LAUNCH: OnceLock<Instant> = OnceLock::new();
215
    let start = LAUNCH.get_or_init(Instant::now);
216
    start.elapsed().as_nanos() as u64
217
}
218

            
219
/// Format drained probe events as a per-phase timing table to stderr.
220
///
221
/// Groups `EventKind::Span` by name and prints count / total / avg / p99 /
222
/// max in µs. `EventKind::Rss` checkpoints print in wall-clock order with
223
/// deltas so allocator purges are visible.
224
///
225
/// Sorted by total-ns descending so the slowest phase is on top — ideal
226
/// for spotting which phase spiked during a stuttering frame.
227
///
228
/// Called by `AZ_PROFILE=cpu` dumps (both initial layout and relayout),
229
/// and also by external consumers like `servo-shot --azul-trace`.
230
pub fn print_drained_events(label: &str, events: &[Event]) {
231
    use std::collections::BTreeMap;
232

            
233
    if events.is_empty() {
234
        if !Probe::enabled() {
235
            // Feature absent or target-family disabled (WASM): show "???"
236
            // instead of a misleading "compile with feature=probe" hint.
237
            eprintln!(
238
                "[CPU] {label}: probe unavailable on this target (timings = ???)"
239
            );
240
        } else {
241
            eprintln!("[CPU] {label}: no events recorded this pass");
242
        }
243
        return;
244
    }
245

            
246
    let mut spans: BTreeMap<&'static str, Vec<u64>> = BTreeMap::new();
247
    let mut rss_marks: Vec<(&'static str, u64)> = Vec::new();
248
    for ev in events {
249
        match ev.kind {
250
            EventKind::Span { dur_ns } => spans.entry(ev.name).or_default().push(dur_ns),
251
            EventKind::Rss { bytes } => rss_marks.push((ev.name, bytes)),
252
        }
253
    }
254

            
255
    let mut rows: Vec<(&'static str, usize, u64, u64, u64, u64)> = spans
256
        .into_iter()
257
        .map(|(name, mut ns)| {
258
            ns.sort_unstable();
259
            let n = ns.len();
260
            let total: u128 = ns.iter().map(|&x| x as u128).sum();
261
            let avg = (total / n.max(1) as u128) as u64;
262
            let p99 = ns[(n.saturating_sub(1) * 99) / 100];
263
            let max = *ns.last().unwrap();
264
            (name, n, total as u64, avg, p99, max)
265
        })
266
        .collect();
267
    rows.sort_by(|a, b| b.2.cmp(&a.2));
268

            
269
    eprintln!("[CPU] === {label} ({} phases) ===", rows.len());
270
    eprintln!(
271
        "[CPU] {:<28}  {:>5}  {:>10}  {:>9}  {:>9}  {:>9}",
272
        "phase", "n", "total(µs)", "avg(µs)", "p99(µs)", "max(µs)"
273
    );
274
    for (name, n, total, avg, p99, max) in &rows {
275
        eprintln!(
276
            "[CPU] {:<28}  {:>5}  {:>10.1}  {:>9.2}  {:>9.2}  {:>9.2}",
277
            name,
278
            n,
279
            (*total as f64) / 1_000.0,
280
            (*avg as f64) / 1_000.0,
281
            (*p99 as f64) / 1_000.0,
282
            (*max as f64) / 1_000.0,
283
        );
284
    }
285
    if !rss_marks.is_empty() {
286
        eprintln!("[CPU]   -- RSS checkpoints (wall-clock order) --");
287
        let mut prev: Option<u64> = None;
288
        for (lbl, bytes) in &rss_marks {
289
            let delta = prev
290
                .map(|p| {
291
                    let diff = *bytes as i128 - p as i128;
292
                    if diff >= 0 {
293
                        format!("  (Δ +{:.2} MiB)", diff as f64 / 1048576.0)
294
                    } else {
295
                        format!("  (Δ -{:.2} MiB)", -diff as f64 / 1048576.0)
296
                    }
297
                })
298
                .unwrap_or_default();
299
            eprintln!(
300
                "[CPU]   {:<28}  {:.2} MiB{}",
301
                lbl,
302
                *bytes as f64 / 1048576.0,
303
                delta
304
            );
305
            prev = Some(*bytes);
306
        }
307
    }
308
}
309

            
310
/// Convenience wrapper: sample the process's **current** resident set
311
/// (not peak) via `task_info` on macOS / `/proc/self/statm` on Linux and
312
/// push it into the probe event buffer under the given label.
313
///
314
/// Using current RSS (not `getrusage.ru_maxrss`) is essential so that
315
/// allocator purges are visible — peak RSS only moves up. Name kept as
316
/// `sample_peak_rss` for backwards compatibility with existing
317
/// checkpoint labels; semantically it is "sample current".
318
#[inline]
319
52888
pub fn sample_peak_rss(label: &'static str) {
320
    // [WEB-LIFT 2026-06-11] also no-op under web_lift: current_rss_bytes/
321
    // peak_rss_bytes_self are mach syscalls (task_info/getrusage) —
322
    // out-of-image and unliftable. See the `imp` cfg note above.
323
    #[cfg(all(feature = "probe", not(feature = "web_lift")))]
324
    {
325
        let (current, _virt) = current_rss_bytes();
326
        let bytes = if current != 0 { current } else { peak_rss_bytes_self() };
327
        Probe::sample_rss(label, bytes);
328
    }
329
    #[cfg(any(not(feature = "probe"), feature = "web_lift"))]
330
52888
    let _ = label;
331
52888
}
332

            
333
#[cfg(feature = "probe")]
334
pub fn peak_rss_bytes_pub() -> u64 { peak_rss_bytes_self() }
335

            
336
#[cfg(feature = "probe")]
337
fn peak_rss_bytes_self() -> u64 {
338
    #[cfg(unix)]
339
    unsafe {
340
        let mut ru: libc::rusage = core::mem::zeroed();
341
        if libc::getrusage(libc::RUSAGE_SELF, &mut ru) != 0 {
342
            return 0;
343
        }
344
        let raw = ru.ru_maxrss as u64;
345
        if cfg!(target_os = "macos") { raw } else { raw.saturating_mul(1024) }
346
    }
347
    #[cfg(not(unix))]
348
    {
349
        0
350
    }
351
}
352

            
353
/// Ask the active global allocator to return freed pages to the OS.
354
///
355
/// - With `allocator_mimalloc` feature: calls `mi_collect(true)`, which
356
///   aggressively returns pages (matches `az_purge_allocator` in azul-dll).
357
/// - With `allocator_jemalloc` feature: calls `mallctl("arena.0.purge")`.
358
/// - Otherwise on macOS: falls back to `malloc_zone_pressure_relief`
359
///   which drains the system zone (no-op when a third-party allocator
360
///   is the global one — hence the explicit feature flags above).
361
/// - Other platforms with default allocator: no-op.
362
///
363
/// Call after major allocations are freed (e.g. after a layout pass).
364
#[inline]
365
8536
pub fn hint_purge_allocator() {
366
    #[cfg(feature = "allocator_mimalloc")]
367
    {
368
        // Aggressive purge — returns arenas to the OS when possible.
369
        unsafe {
370
            libmimalloc_sys::mi_collect(true);
371
        }
372
        static PURGE_TRACE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
373
        if *PURGE_TRACE.get_or_init(azul_core::profile::memory_enabled) {
374
            let (rss, _) = current_rss_bytes();
375
            eprintln!("[PURGE] mi_collect(true) called — current rss={:.2} MiB", rss as f64 / 1048576.0);
376
        }
377
        return;
378
    }
379
    #[cfg(feature = "allocator_jemalloc")]
380
    {
381
        // Purge all arenas. `arena.<i>.purge` with i = MALLCTL_ARENAS_ALL.
382
        unsafe {
383
            let _ = tikv_jemalloc_sys::mallctl(
384
                b"arena.4096.purge\0".as_ptr() as *const _,
385
                core::ptr::null_mut(),
386
                core::ptr::null_mut(),
387
                core::ptr::null_mut(),
388
                0,
389
            );
390
        }
391
        return;
392
    }
393
    #[cfg(all(target_os = "macos", not(miri), not(any(feature = "allocator_mimalloc", feature = "allocator_jemalloc"))))]
394
    {
395
        extern "C" {
396
            fn malloc_zone_pressure_relief(zone: *mut core::ffi::c_void, goal: usize) -> usize;
397
        }
398
        unsafe {
399
            malloc_zone_pressure_relief(core::ptr::null_mut(), 0);
400
        }
401
    }
402
8536
}
403

            
404
/// Sample the process's "real" memory footprint (not peak).
405
/// Returns (footprint_bytes, virtual_bytes). On macOS this is
406
/// `phys_footprint` from `TASK_VM_INFO` — matches Activity Monitor
407
/// "Memory" and `vmmap`'s "Physical footprint" line, and excludes
408
/// shared library text pages that would otherwise inflate RSS
409
/// without costing the process anything uniquely. On Linux this
410
/// falls back to `/proc/self/statm` resident size (no direct
411
/// equivalent; the shared-lib inflation is much smaller there).
412
/// More useful than `getrusage.ru_maxrss` which only moves upward.
413
#[cfg(feature = "probe")]
414
pub fn current_rss_bytes() -> (u64, u64) {
415
    // Miri cannot call the mach `task_info` foreign function; memory profiling
416
    // is meaningless under Miri anyway, so report zero.
417
    #[cfg(miri)]
418
    return (0, 0);
419
    #[cfg(all(target_os = "macos", not(miri)))]
420
    {
421
        // Prefer phys_footprint (TASK_VM_INFO). Fall back to
422
        // resident_size (MACH_TASK_BASIC_INFO) if the bigger struct
423
        // isn't populated for some reason.
424
        let pf = phys_footprint_bytes();
425
        #[repr(C)]
426
        struct MachTaskBasicInfo {
427
            virtual_size: u64,
428
            resident_size: u64,
429
            resident_size_max: u64,
430
            user_time: [u32; 2],
431
            system_time: [u32; 2],
432
            policy: i32,
433
            suspend_count: i32,
434
        }
435
        const MACH_TASK_BASIC_INFO: u32 = 20;
436
        extern "C" {
437
            fn mach_task_self() -> u32;
438
            fn task_info(
439
                target: u32, flavor: u32,
440
                info: *mut core::ffi::c_void, count: *mut u32,
441
            ) -> i32;
442
        }
443
        unsafe {
444
            let mut info: MachTaskBasicInfo = core::mem::zeroed();
445
            let mut count = (core::mem::size_of::<MachTaskBasicInfo>() / 4) as u32;
446
            let kr = task_info(
447
                mach_task_self(),
448
                MACH_TASK_BASIC_INFO,
449
                &mut info as *mut _ as *mut core::ffi::c_void,
450
                &mut count,
451
            );
452
            if kr == 0 {
453
                let rss = if pf != 0 { pf } else { info.resident_size };
454
                (rss, info.virtual_size)
455
            } else {
456
                (pf, 0)
457
            }
458
        }
459
    }
460
    #[cfg(not(target_os = "macos"))]
461
    { (0, 0) }
462
}
463

            
464
/// Heap bytes currently held by the libc allocator (`mstats.bytes_used`).
465
///
466
/// Unlike RSS, this is what *Rust* allocations plus anything else going
467
/// through the default malloc zone is actually holding — mmap regions
468
/// for thread stacks, GL buffers, file-mapped fonts, etc. are NOT counted.
469
/// A leak that shows up here points to a genuine heap retention (an Arc
470
/// chain never dropped, a Vec never shrunk, a `Box<T>` forgotten).
471
/// Returns 0 on non-macOS.
472
#[cfg(feature = "probe")]
473
pub fn malloc_heap_bytes() -> u64 {
474
    #[cfg(target_os = "macos")]
475
    {
476
        #[repr(C)]
477
        struct Mstats {
478
            bytes_total: usize,
479
            chunks_used: usize,
480
            bytes_used: usize,
481
            chunks_free: usize,
482
            bytes_free: usize,
483
        }
484
        extern "C" {
485
            fn mstats() -> Mstats;
486
        }
487
        unsafe { mstats().bytes_used as u64 }
488
    }
489
    #[cfg(not(target_os = "macos"))]
490
    { 0 }
491
}
492

            
493
/// Sample the Mach `phys_footprint` — the memory metric Activity
494
/// Monitor and `vmmap`'s "Physical footprint" line display. Unlike
495
/// `resident_size`, this excludes shared library text pages and
496
/// other kernel-mapped regions that inflate the traditional RSS
497
/// number without actually costing the process anything. For a
498
/// short-lived headless render this is a much more honest figure:
499
/// on a ~20 MiB ru_maxrss run, phys_footprint is typically ~8 MiB.
500
/// Returns 0 on non-macOS or if the Mach call fails.
501
///
502
/// There's no direct "peak phys_footprint" field; track the max
503
/// across calls in application code if you need it.
504
#[cfg(feature = "probe")]
505
pub fn phys_footprint_bytes() -> u64 {
506
    // Miri cannot call the mach `task_info` foreign function.
507
    #[cfg(miri)]
508
    return 0;
509
    #[cfg(all(target_os = "macos", not(miri)))]
510
    {
511
        // TASK_VM_INFO = 22; the struct is large (~88 u32 counts ≈ 352 B)
512
        // and phys_footprint lives near the end, so we have to read the
513
        // whole thing. Layout is from osfmk/mach/task_info.h.
514
        #[repr(C)]
515
        struct TaskVmInfo {
516
            virtual_size: u64,
517
            region_count: u32,
518
            page_size: u32,
519
            resident_size: u64,
520
            resident_size_peak: u64,
521
            device: u64,
522
            device_peak: u64,
523
            internal: u64,
524
            internal_peak: u64,
525
            external: u64,
526
            external_peak: u64,
527
            reusable: u64,
528
            reusable_peak: u64,
529
            purgeable_volatile_pmap: u64,
530
            purgeable_volatile_resident: u64,
531
            purgeable_volatile_virtual: u64,
532
            compressed: u64,
533
            compressed_peak: u64,
534
            compressed_lifetime: u64,
535
            phys_footprint: u64,
536
            // there are more fields after this, but we don't need them
537
            _rest: [u64; 12],
538
        }
539
        const TASK_VM_INFO: u32 = 22;
540
        extern "C" {
541
            fn mach_task_self() -> u32;
542
            fn task_info(
543
                target: u32, flavor: u32,
544
                info: *mut core::ffi::c_void, count: *mut u32,
545
            ) -> i32;
546
        }
547
        unsafe {
548
            let mut info: TaskVmInfo = core::mem::zeroed();
549
            let mut count = (core::mem::size_of::<TaskVmInfo>() / 4) as u32;
550
            let kr = task_info(
551
                mach_task_self(),
552
                TASK_VM_INFO,
553
                &mut info as *mut _ as *mut core::ffi::c_void,
554
                &mut count,
555
            );
556
            if kr == 0 { info.phys_footprint } else { 0 }
557
        }
558
    }
559
    #[cfg(not(target_os = "macos"))]
560
    { 0 }
561
}
562

            
563
/// Background sampler for peak phys_footprint. Spawns a thread that
564
/// polls `phys_footprint_bytes()` every ~2 ms and updates a shared
565
/// atomic. The kernel does not expose a direct "peak phys_footprint"
566
/// — unlike `resident_size_peak` in TASK_VM_INFO — so polling is
567
/// the only way to catch mid-phase transients that are MADV_FREE'd
568
/// before the next explicit sample point.
569
///
570
/// Not started by default; call `start_peak_sampler()` once at
571
/// process init if you want peak tracking. Overhead is negligible
572
/// (~1-5 µs per poll on macOS, 500 Hz → <0.25% CPU of one core).
573
/// `peak_phys_footprint_seen()` reads the current high-water mark.
574
#[cfg(feature = "probe")]
575
pub fn start_peak_sampler() {
576
    #[cfg(target_os = "macos")]
577
    {
578
        use std::sync::atomic::Ordering;
579
        // Idempotent — only spawns once.
580
        static STARTED: std::sync::atomic::AtomicBool =
581
            std::sync::atomic::AtomicBool::new(false);
582
        if STARTED.swap(true, Ordering::AcqRel) {
583
            return;
584
        }
585
        std::thread::Builder::new()
586
            .name("azul-peak-sampler".to_string())
587
            .spawn(|| loop {
588
                let now = phys_footprint_bytes();
589
                let prev = PEAK_PHYS_FOOTPRINT.load(Ordering::Relaxed);
590
                if now > prev {
591
                    PEAK_PHYS_FOOTPRINT.store(now, Ordering::Relaxed);
592
                }
593
                std::thread::sleep(std::time::Duration::from_micros(250));
594
            })
595
            .ok();
596
    }
597
}
598

            
599
#[cfg(feature = "probe")]
600
static PEAK_PHYS_FOOTPRINT: std::sync::atomic::AtomicU64 =
601
    std::sync::atomic::AtomicU64::new(0);
602

            
603
/// Read the peak `phys_footprint` seen by the background sampler.
604
/// Returns 0 if `start_peak_sampler` was never called.
605
#[cfg(feature = "probe")]
606
pub fn peak_phys_footprint_seen() -> u64 {
607
    PEAK_PHYS_FOOTPRINT.load(std::sync::atomic::Ordering::Relaxed)
608
}
609

            
610
/// Reset the global peak high-water mark to the current phys_footprint.
611
/// Paired with `peak_phys_footprint_seen()` so a caller can record
612
/// "peak during phase X" — call `reset_peak()` at phase entry, then
613
/// `peak_phys_footprint_seen()` at phase exit. The 500 Hz background
614
/// sampler runs continuously either way.
615
#[cfg(feature = "probe")]
616
pub fn reset_peak() {
617
    let now = phys_footprint_bytes();
618
    PEAK_PHYS_FOOTPRINT.store(now, std::sync::atomic::Ordering::Relaxed);
619
}
620

            
621
/// Record a phase's peak footprint into the probe event stream.
622
/// Call at phase exit after `reset_peak()` at phase entry. Emits an
623
/// RSS-kind event with `bytes = peak seen during phase`.
624
#[cfg(feature = "probe")]
625
#[inline]
626
pub fn sample_phase_peak(label: &'static str) {
627
    let peak = PEAK_PHYS_FOOTPRINT.load(std::sync::atomic::Ordering::Relaxed);
628
    Probe::sample_rss(label, peak);
629
}
630

            
631
#[cfg(not(feature = "probe"))]
632
#[inline(always)]
633
18700
pub fn reset_peak() {}
634

            
635
#[cfg(not(feature = "probe"))]
636
#[inline(always)]
637
18700
pub fn sample_phase_peak(_label: &'static str) {}
638

            
639
#[cfg(not(feature = "probe"))]
640
#[inline(always)]
641
pub fn malloc_heap_bytes() -> u64 { 0 }
642

            
643
/// Emit one `{"ev":"phase","label":L,"heap":N,"call":C}` line to the
644
/// JSONL file named by `AZ_PROFILE_OUT=<path>`. Only fires when
645
/// `AZ_PROFILE=heap,jsonl` is set *and* the path is given.
646
///
647
/// Each call auto-increments a monotonic `call` id so downstream
648
/// analyzers can group phases belonging to a single `regenerate_layout`
649
/// invocation.
650
///
651
/// `label` convention: `start` at function entry; `<step>` after each
652
/// phase completes; `end` at function exit. Heap Δ between adjacent
653
/// labels within the same call-id is the bytes retained by that phase.
654
///
655
/// Zero overhead when flags aren't set (two atomic loads). Zero overhead
656
/// when the `probe` feature is off (no-op stub).
657
#[cfg(feature = "probe")]
658
pub fn emit_phase_heap(label: &str) {
659
    use std::io::Write;
660
    if !heap_jsonl_enabled() { return; }
661
    let Some(p) = azul_core::profile::out_path() else { return };
662
    static CALL_ID: std::sync::atomic::AtomicU64 =
663
        std::sync::atomic::AtomicU64::new(0);
664
    // Auto-increment on every "start" label; "end" and intermediates reuse
665
    // the current id so all phases in one regenerate_layout invocation share
666
    // a call number.
667
    static CURRENT_CALL: std::sync::atomic::AtomicU64 =
668
        std::sync::atomic::AtomicU64::new(0);
669
    let call_id = if label == "start" {
670
        let next = CALL_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
671
        CURRENT_CALL.store(next, std::sync::atomic::Ordering::Relaxed);
672
        next
673
    } else {
674
        CURRENT_CALL.load(std::sync::atomic::Ordering::Relaxed)
675
    };
676
    let heap = malloc_heap_bytes();
677
    if let Ok(mut f) = std::fs::OpenOptions::new()
678
        .create(true)
679
        .append(true)
680
        .open(p)
681
    {
682
        let _ = writeln!(
683
            f,
684
            r#"{{"ev":"phase","call":{},"label":"{}","heap":{}}}"#,
685
            call_id, label, heap
686
        );
687
    }
688
}
689

            
690
#[cfg(not(feature = "probe"))]
691
#[inline(always)]
692
pub fn emit_phase_heap(_label: &str) {}
693

            
694
/// Like [`emit_phase_heap`] but attaches a numeric payload (e.g., a cache
695
/// size) to the JSONL record under the `"extra"` field.
696
///
697
/// Gated behind `AZ_PROFILE=heap,jsonl,detail` — the `detail` token opts
698
/// in to fine-grained probes that produce extra per-step records (one
699
/// per intermediate step inside a phase). Without `detail`, only the
700
/// coarser phase probes from [`emit_phase_heap`] fire.
701
#[cfg(feature = "probe")]
702
pub fn emit_phase_heap_extra(label: &str, extra: u64) {
703
    use std::io::Write;
704
    if !heap_jsonl_enabled() { return; }
705
    if !azul_core::profile::detail_enabled() { return; }
706
    let Some(p) = azul_core::profile::out_path() else { return };
707
    let heap = malloc_heap_bytes();
708
    if let Ok(mut f) = std::fs::OpenOptions::new()
709
        .create(true)
710
        .append(true)
711
        .open(p)
712
    {
713
        let _ = writeln!(
714
            f,
715
            r#"{{"ev":"phase","call":0,"label":"{}","heap":{},"extra":{}}}"#,
716
            label, heap, extra
717
        );
718
    }
719
}
720

            
721
#[cfg(not(feature = "probe"))]
722
#[inline(always)]
723
pub fn emit_phase_heap_extra(_label: &str, _extra: u64) {}
724

            
725
/// Both `heap` and `jsonl` tokens active in `AZ_PROFILE` — the combination
726
/// that enables JSONL heap-probe emission. Either alone is a no-op.
727
#[cfg(feature = "probe")]
728
#[inline]
729
fn heap_jsonl_enabled() -> bool {
730
    let f = azul_core::profile::flags();
731
    f.heap && f.jsonl
732
}
733

            
734
/// Returns true iff `AZ_PROFILE=detail` is active. Kept as a public
735
/// re-export so downstream crates can write `azul_layout::probe::detail_enabled()`
736
/// without pulling in `azul_core::profile` directly.
737
#[cfg(feature = "probe")]
738
#[inline]
739
pub fn detail_enabled() -> bool {
740
    azul_core::profile::detail_enabled()
741
}
742

            
743
#[cfg(not(feature = "probe"))]
744
#[inline(always)]
745
pub fn detail_enabled() -> bool { false }