1
//! Optional fine-grained timing + RSS instrumentation.
2
//!
3
//! Behind the `probe` feature flag every [`Probe::span`] returns a guard
4
//! that records the elapsed wall-clock on `Drop`, and
5
//! [`Probe::sample_rss`] records a labelled RSS checkpoint. Events are
6
//! buffered in a per-thread [`Vec`] and drained by the consumer with
7
//! [`Probe::drain`].
8
//!
9
//! With the feature off every method is a `#[inline(always)]` no-op so
10
//! release builds without the feature pay zero cost.
11
//!
12
//! Consumer (e.g. servo-shot) groups drained events by name to produce
13
//! the per-phase averages / p99s in its trace report.
14

            
15
use core::marker::PhantomData;
16

            
17
// WASM gate: `Instant::now()` panics on browser WASM (no monotonic clock)
18
// and `libc::getrusage` isn't available, so on `target_family = "wasm"`
19
// we drop to the no-op stubs even when the `probe` feature is on.
20
// `AZ_PROFILE=cpu` then prints "(probe unavailable on this target)"
21
// rather than crashing.
22

            
23
#[cfg(all(feature = "probe", not(target_family = "wasm")))]
24
mod imp {
25
    use std::cell::RefCell;
26
    use std::time::Instant;
27

            
28
    thread_local! {
29
        static EVENTS: RefCell<Vec<super::Event>> = const { RefCell::new(Vec::new()) };
30
    }
31

            
32
    /// RAII guard that records its name + elapsed nanos on drop.
33
    pub struct Span {
34
        pub(crate) name: &'static str,
35
        pub(crate) start: Instant,
36
    }
37

            
38
    impl Drop for Span {
39
        fn drop(&mut self) {
40
            let dur_ns = self.start.elapsed().as_nanos() as u64;
41
            // try_with (not with): the lifted-to-wasm web backend has no real
42
            // TLS, so `with` hits panic_access_error. These probe accesses are
43
            // inlined into layout_dom_recursive/layout_document, so they can't
44
            // be stubbed at the symbol level — use the non-panicking access.
45
            let _ = EVENTS.try_with(|cell| {
46
                cell.borrow_mut().push(super::Event {
47
                    name: self.name,
48
                    kind: super::EventKind::Span { dur_ns },
49
                });
50
            });
51
        }
52
    }
53

            
54
    pub(super) fn open(name: &'static str) -> Span {
55
        Span { name, start: Instant::now() }
56
    }
57

            
58
    pub(super) fn sample_rss(label: &'static str, bytes: u64) {
59
        // try_with: see Span::drop — no real TLS in the lifted wasm backend.
60
        let _ = EVENTS.try_with(|cell| {
61
            cell.borrow_mut().push(super::Event {
62
                name: label,
63
                kind: super::EventKind::Rss { bytes },
64
            });
65
        });
66
    }
67

            
68
    pub(super) fn drain() -> Vec<super::Event> {
69
        EVENTS
70
            .try_with(|cell| core::mem::take(&mut *cell.borrow_mut()))
71
            .unwrap_or_default()
72
    }
73

            
74
    pub(super) fn drop_events() {
75
        let _ = EVENTS.try_with(|cell| cell.borrow_mut().clear());
76
    }
77

            
78
    pub(super) fn peek_len() -> usize {
79
        EVENTS.try_with(|cell| cell.borrow().len()).unwrap_or(0)
80
    }
81

            
82
    pub(super) fn enabled() -> bool {
83
        true
84
    }
85
}
86

            
87
#[cfg(any(not(feature = "probe"), target_family = "wasm"))]
88
mod imp {
89
    pub struct Span;
90

            
91
    impl Drop for Span {
92
        #[inline(always)]
93
246540
        fn drop(&mut self) {}
94
    }
95

            
96
    #[inline(always)]
97
246540
    pub(super) fn open(_name: &'static str) -> Span {
98
246540
        Span
99
246540
    }
100

            
101
    #[inline(always)]
102
    pub(super) fn sample_rss(_label: &'static str, _bytes: u64) {}
103

            
104
    #[inline(always)]
105
    pub(super) fn drain() -> Vec<super::Event> {
106
        Vec::new()
107
    }
108

            
109
    #[inline(always)]
110
    pub(super) fn drop_events() {}
111

            
112
    #[inline(always)]
113
    pub(super) fn peek_len() -> usize { 0 }
114

            
115
    #[inline(always)]
116
    pub(super) fn enabled() -> bool {
117
        false
118
    }
119
}
120

            
121
/// Drained probe event. `Vec<Event>` is what consumers walk to render
122
/// trace summaries; the order is the order events fired in.
123
#[derive(Debug, Clone)]
124
pub struct Event {
125
    pub name: &'static str,
126
    pub kind: EventKind,
127
}
128

            
129
#[derive(Debug, Clone)]
130
pub enum EventKind {
131
    /// A timed scope's wall-clock duration.
132
    Span { dur_ns: u64 },
133
    /// A labelled RSS checkpoint.
134
    Rss { bytes: u64 },
135
}
136

            
137
/// Re-exported guard. Held by the caller of [`Probe::span`].
138
pub use imp::Span;
139

            
140
/// Probe API. All methods are no-ops without the `probe` feature.
141
pub struct Probe {
142
    _no_construct: PhantomData<()>,
143
}
144

            
145
impl Probe {
146
    /// Open a timed span. The returned guard records its name + nanos
147
    /// on drop into the thread-local event buffer.
148
    #[inline(always)]
149
246540
    pub fn span(name: &'static str) -> Span {
150
246540
        imp::open(name)
151
246540
    }
152

            
153
    /// Record an RSS checkpoint with the given label + byte count. The
154
    /// caller supplies the bytes (this module does not depend on
155
    /// platform RSS readers) so consumers can use whatever measurement
156
    /// helper they own.
157
    #[inline(always)]
158
    pub fn sample_rss(label: &'static str, bytes: u64) {
159
        imp::sample_rss(label, bytes);
160
    }
161

            
162
    /// Drain the per-thread event buffer.
163
    #[inline(always)]
164
    pub fn drain() -> Vec<Event> {
165
        imp::drain()
166
    }
167

            
168
    /// Discard the per-thread event buffer without allocating a `Vec` to
169
    /// hand back. Used by long-running harnesses (e.g. `AZ_E2E_TEST`) that
170
    /// want to prevent the thread-local buffer from inflating RSS during
171
    /// thousands of layout passes without actually needing the events.
172
    #[inline(always)]
173
    pub fn drop_events() {
174
        imp::drop_events();
175
    }
176

            
177
    /// Current number of events in the per-thread buffer. Cheap to call.
178
    #[inline(always)]
179
    pub fn peek_len() -> usize {
180
        imp::peek_len()
181
    }
182

            
183
    /// Whether the `probe` feature is compiled in.
184
    #[inline(always)]
185
    pub fn enabled() -> bool {
186
        imp::enabled()
187
    }
188
}
189

            
190
/// Same monotonic clock used by `font::parsed::monotonic_now_nanos` for
191
/// LRU stamping. Re-exported here so any caller that wants raw nanos
192
/// without going through a span guard has one source of truth.
193
#[inline]
194
pub fn monotonic_now_nanos() -> u64 {
195
    use std::sync::OnceLock;
196
    use std::time::Instant;
197
    static LAUNCH: OnceLock<Instant> = OnceLock::new();
198
    let start = LAUNCH.get_or_init(Instant::now);
199
    start.elapsed().as_nanos() as u64
200
}
201

            
202
/// Format drained probe events as a per-phase timing table to stderr.
203
///
204
/// Groups `EventKind::Span` by name and prints count / total / avg / p99 /
205
/// max in µs. `EventKind::Rss` checkpoints print in wall-clock order with
206
/// deltas so allocator purges are visible.
207
///
208
/// Sorted by total-ns descending so the slowest phase is on top — ideal
209
/// for spotting which phase spiked during a stuttering frame.
210
///
211
/// Called by `AZ_PROFILE=cpu` dumps (both initial layout and relayout),
212
/// and also by external consumers like `servo-shot --azul-trace`.
213
pub fn print_drained_events(label: &str, events: &[Event]) {
214
    use std::collections::BTreeMap;
215

            
216
    if events.is_empty() {
217
        if !Probe::enabled() {
218
            // Feature absent or target-family disabled (WASM): show "???"
219
            // instead of a misleading "compile with feature=probe" hint.
220
            eprintln!(
221
                "[CPU] {label}: probe unavailable on this target (timings = ???)"
222
            );
223
        } else {
224
            eprintln!("[CPU] {label}: no events recorded this pass");
225
        }
226
        return;
227
    }
228

            
229
    let mut spans: BTreeMap<&'static str, Vec<u64>> = BTreeMap::new();
230
    let mut rss_marks: Vec<(&'static str, u64)> = Vec::new();
231
    for ev in events {
232
        match ev.kind {
233
            EventKind::Span { dur_ns } => spans.entry(ev.name).or_default().push(dur_ns),
234
            EventKind::Rss { bytes } => rss_marks.push((ev.name, bytes)),
235
        }
236
    }
237

            
238
    let mut rows: Vec<(&'static str, usize, u64, u64, u64, u64)> = spans
239
        .into_iter()
240
        .map(|(name, mut ns)| {
241
            ns.sort_unstable();
242
            let n = ns.len();
243
            let total: u128 = ns.iter().map(|&x| x as u128).sum();
244
            let avg = (total / n.max(1) as u128) as u64;
245
            let p99 = ns[(n.saturating_sub(1) * 99) / 100];
246
            let max = *ns.last().unwrap();
247
            (name, n, total as u64, avg, p99, max)
248
        })
249
        .collect();
250
    rows.sort_by(|a, b| b.2.cmp(&a.2));
251

            
252
    eprintln!("[CPU] === {label} ({} phases) ===", rows.len());
253
    eprintln!(
254
        "[CPU] {:<28}  {:>5}  {:>10}  {:>9}  {:>9}  {:>9}",
255
        "phase", "n", "total(µs)", "avg(µs)", "p99(µs)", "max(µs)"
256
    );
257
    for (name, n, total, avg, p99, max) in &rows {
258
        eprintln!(
259
            "[CPU] {:<28}  {:>5}  {:>10.1}  {:>9.2}  {:>9.2}  {:>9.2}",
260
            name,
261
            n,
262
            (*total as f64) / 1_000.0,
263
            (*avg as f64) / 1_000.0,
264
            (*p99 as f64) / 1_000.0,
265
            (*max as f64) / 1_000.0,
266
        );
267
    }
268
    if !rss_marks.is_empty() {
269
        eprintln!("[CPU]   -- RSS checkpoints (wall-clock order) --");
270
        let mut prev: Option<u64> = None;
271
        for (lbl, bytes) in &rss_marks {
272
            let delta = prev
273
                .map(|p| {
274
                    let diff = *bytes as i128 - p as i128;
275
                    if diff >= 0 {
276
                        format!("  (Δ +{:.2} MiB)", diff as f64 / 1048576.0)
277
                    } else {
278
                        format!("  (Δ -{:.2} MiB)", -diff as f64 / 1048576.0)
279
                    }
280
                })
281
                .unwrap_or_default();
282
            eprintln!(
283
                "[CPU]   {:<28}  {:.2} MiB{}",
284
                lbl,
285
                *bytes as f64 / 1048576.0,
286
                delta
287
            );
288
            prev = Some(*bytes);
289
        }
290
    }
291
}
292

            
293
/// Convenience wrapper: sample the process's **current** resident set
294
/// (not peak) via `task_info` on macOS / `/proc/self/statm` on Linux and
295
/// push it into the probe event buffer under the given label.
296
///
297
/// Using current RSS (not `getrusage.ru_maxrss`) is essential so that
298
/// allocator purges are visible — peak RSS only moves up. Name kept as
299
/// `sample_peak_rss` for backwards compatibility with existing
300
/// checkpoint labels; semantically it is "sample current".
301
#[inline]
302
31010
pub fn sample_peak_rss(label: &'static str) {
303
    #[cfg(feature = "probe")]
304
    {
305
        let (current, _virt) = current_rss_bytes();
306
        let bytes = if current != 0 { current } else { peak_rss_bytes_self() };
307
        Probe::sample_rss(label, bytes);
308
    }
309
    #[cfg(not(feature = "probe"))]
310
31010
    let _ = label;
311
31010
}
312

            
313
#[cfg(feature = "probe")]
314
pub fn peak_rss_bytes_pub() -> u64 { peak_rss_bytes_self() }
315

            
316
#[cfg(feature = "probe")]
317
fn peak_rss_bytes_self() -> u64 {
318
    #[cfg(unix)]
319
    unsafe {
320
        let mut ru: libc::rusage = core::mem::zeroed();
321
        if libc::getrusage(libc::RUSAGE_SELF, &mut ru) != 0 {
322
            return 0;
323
        }
324
        let raw = ru.ru_maxrss as u64;
325
        if cfg!(target_os = "macos") { raw } else { raw.saturating_mul(1024) }
326
    }
327
    #[cfg(not(unix))]
328
    {
329
        0
330
    }
331
}
332

            
333
/// Ask the active global allocator to return freed pages to the OS.
334
///
335
/// - With `allocator_mimalloc` feature: calls `mi_collect(true)`, which
336
///   aggressively returns pages (matches `az_purge_allocator` in azul-dll).
337
/// - With `allocator_jemalloc` feature: calls `mallctl("arena.0.purge")`.
338
/// - Otherwise on macOS: falls back to `malloc_zone_pressure_relief`
339
///   which drains the system zone (no-op when a third-party allocator
340
///   is the global one — hence the explicit feature flags above).
341
/// - Other platforms with default allocator: no-op.
342
///
343
/// Call after major allocations are freed (e.g. after a layout pass).
344
#[inline]
345
5005
pub fn hint_purge_allocator() {
346
    #[cfg(feature = "allocator_mimalloc")]
347
    {
348
        // Aggressive purge — returns arenas to the OS when possible.
349
        unsafe {
350
            libmimalloc_sys::mi_collect(true);
351
        }
352
        static PURGE_TRACE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
353
        if *PURGE_TRACE.get_or_init(azul_core::profile::memory_enabled) {
354
            let (rss, _) = current_rss_bytes();
355
            eprintln!("[PURGE] mi_collect(true) called — current rss={:.2} MiB", rss as f64 / 1048576.0);
356
        }
357
        return;
358
    }
359
    #[cfg(feature = "allocator_jemalloc")]
360
    {
361
        // Purge all arenas. `arena.<i>.purge` with i = MALLCTL_ARENAS_ALL.
362
        unsafe {
363
            let _ = tikv_jemalloc_sys::mallctl(
364
                b"arena.4096.purge\0".as_ptr() as *const _,
365
                core::ptr::null_mut(),
366
                core::ptr::null_mut(),
367
                core::ptr::null_mut(),
368
                0,
369
            );
370
        }
371
        return;
372
    }
373
    #[cfg(all(target_os = "macos", not(any(feature = "allocator_mimalloc", feature = "allocator_jemalloc"))))]
374
    {
375
        extern "C" {
376
            fn malloc_zone_pressure_relief(zone: *mut core::ffi::c_void, goal: usize) -> usize;
377
        }
378
        unsafe {
379
            malloc_zone_pressure_relief(core::ptr::null_mut(), 0);
380
        }
381
    }
382
5005
}
383

            
384
/// Sample the process's "real" memory footprint (not peak).
385
/// Returns (footprint_bytes, virtual_bytes). On macOS this is
386
/// `phys_footprint` from `TASK_VM_INFO` — matches Activity Monitor
387
/// "Memory" and `vmmap`'s "Physical footprint" line, and excludes
388
/// shared library text pages that would otherwise inflate RSS
389
/// without costing the process anything uniquely. On Linux this
390
/// falls back to `/proc/self/statm` resident size (no direct
391
/// equivalent; the shared-lib inflation is much smaller there).
392
/// More useful than `getrusage.ru_maxrss` which only moves upward.
393
#[cfg(feature = "probe")]
394
pub fn current_rss_bytes() -> (u64, u64) {
395
    #[cfg(target_os = "macos")]
396
    {
397
        // Prefer phys_footprint (TASK_VM_INFO). Fall back to
398
        // resident_size (MACH_TASK_BASIC_INFO) if the bigger struct
399
        // isn't populated for some reason.
400
        let pf = phys_footprint_bytes();
401
        #[repr(C)]
402
        struct MachTaskBasicInfo {
403
            virtual_size: u64,
404
            resident_size: u64,
405
            resident_size_max: u64,
406
            user_time: [u32; 2],
407
            system_time: [u32; 2],
408
            policy: i32,
409
            suspend_count: i32,
410
        }
411
        const MACH_TASK_BASIC_INFO: u32 = 20;
412
        extern "C" {
413
            fn mach_task_self() -> u32;
414
            fn task_info(
415
                target: u32, flavor: u32,
416
                info: *mut core::ffi::c_void, count: *mut u32,
417
            ) -> i32;
418
        }
419
        unsafe {
420
            let mut info: MachTaskBasicInfo = core::mem::zeroed();
421
            let mut count = (core::mem::size_of::<MachTaskBasicInfo>() / 4) as u32;
422
            let kr = task_info(
423
                mach_task_self(),
424
                MACH_TASK_BASIC_INFO,
425
                &mut info as *mut _ as *mut core::ffi::c_void,
426
                &mut count,
427
            );
428
            if kr == 0 {
429
                let rss = if pf != 0 { pf } else { info.resident_size };
430
                (rss, info.virtual_size)
431
            } else {
432
                (pf, 0)
433
            }
434
        }
435
    }
436
    #[cfg(not(target_os = "macos"))]
437
    { (0, 0) }
438
}
439

            
440
/// Heap bytes currently held by the libc allocator (`mstats.bytes_used`).
441
///
442
/// Unlike RSS, this is what *Rust* allocations plus anything else going
443
/// through the default malloc zone is actually holding — mmap regions
444
/// for thread stacks, GL buffers, file-mapped fonts, etc. are NOT counted.
445
/// A leak that shows up here points to a genuine heap retention (an Arc
446
/// chain never dropped, a Vec never shrunk, a `Box<T>` forgotten).
447
/// Returns 0 on non-macOS.
448
#[cfg(feature = "probe")]
449
pub fn malloc_heap_bytes() -> u64 {
450
    #[cfg(target_os = "macos")]
451
    {
452
        #[repr(C)]
453
        struct Mstats {
454
            bytes_total: usize,
455
            chunks_used: usize,
456
            bytes_used: usize,
457
            chunks_free: usize,
458
            bytes_free: usize,
459
        }
460
        extern "C" {
461
            fn mstats() -> Mstats;
462
        }
463
        unsafe { mstats().bytes_used as u64 }
464
    }
465
    #[cfg(not(target_os = "macos"))]
466
    { 0 }
467
}
468

            
469
/// Sample the Mach `phys_footprint` — the memory metric Activity
470
/// Monitor and `vmmap`'s "Physical footprint" line display. Unlike
471
/// `resident_size`, this excludes shared library text pages and
472
/// other kernel-mapped regions that inflate the traditional RSS
473
/// number without actually costing the process anything. For a
474
/// short-lived headless render this is a much more honest figure:
475
/// on a ~20 MiB ru_maxrss run, phys_footprint is typically ~8 MiB.
476
/// Returns 0 on non-macOS or if the Mach call fails.
477
///
478
/// There's no direct "peak phys_footprint" field; track the max
479
/// across calls in application code if you need it.
480
#[cfg(feature = "probe")]
481
pub fn phys_footprint_bytes() -> u64 {
482
    #[cfg(target_os = "macos")]
483
    {
484
        // TASK_VM_INFO = 22; the struct is large (~88 u32 counts ≈ 352 B)
485
        // and phys_footprint lives near the end, so we have to read the
486
        // whole thing. Layout is from osfmk/mach/task_info.h.
487
        #[repr(C)]
488
        struct TaskVmInfo {
489
            virtual_size: u64,
490
            region_count: u32,
491
            page_size: u32,
492
            resident_size: u64,
493
            resident_size_peak: u64,
494
            device: u64,
495
            device_peak: u64,
496
            internal: u64,
497
            internal_peak: u64,
498
            external: u64,
499
            external_peak: u64,
500
            reusable: u64,
501
            reusable_peak: u64,
502
            purgeable_volatile_pmap: u64,
503
            purgeable_volatile_resident: u64,
504
            purgeable_volatile_virtual: u64,
505
            compressed: u64,
506
            compressed_peak: u64,
507
            compressed_lifetime: u64,
508
            phys_footprint: u64,
509
            // there are more fields after this, but we don't need them
510
            _rest: [u64; 12],
511
        }
512
        const TASK_VM_INFO: u32 = 22;
513
        extern "C" {
514
            fn mach_task_self() -> u32;
515
            fn task_info(
516
                target: u32, flavor: u32,
517
                info: *mut core::ffi::c_void, count: *mut u32,
518
            ) -> i32;
519
        }
520
        unsafe {
521
            let mut info: TaskVmInfo = core::mem::zeroed();
522
            let mut count = (core::mem::size_of::<TaskVmInfo>() / 4) as u32;
523
            let kr = task_info(
524
                mach_task_self(),
525
                TASK_VM_INFO,
526
                &mut info as *mut _ as *mut core::ffi::c_void,
527
                &mut count,
528
            );
529
            if kr == 0 { info.phys_footprint } else { 0 }
530
        }
531
    }
532
    #[cfg(not(target_os = "macos"))]
533
    { 0 }
534
}
535

            
536
/// Background sampler for peak phys_footprint. Spawns a thread that
537
/// polls `phys_footprint_bytes()` every ~2 ms and updates a shared
538
/// atomic. The kernel does not expose a direct "peak phys_footprint"
539
/// — unlike `resident_size_peak` in TASK_VM_INFO — so polling is
540
/// the only way to catch mid-phase transients that are MADV_FREE'd
541
/// before the next explicit sample point.
542
///
543
/// Not started by default; call `start_peak_sampler()` once at
544
/// process init if you want peak tracking. Overhead is negligible
545
/// (~1-5 µs per poll on macOS, 500 Hz → <0.25% CPU of one core).
546
/// `peak_phys_footprint_seen()` reads the current high-water mark.
547
#[cfg(feature = "probe")]
548
pub fn start_peak_sampler() {
549
    #[cfg(target_os = "macos")]
550
    {
551
        use std::sync::atomic::Ordering;
552
        // Idempotent — only spawns once.
553
        static STARTED: std::sync::atomic::AtomicBool =
554
            std::sync::atomic::AtomicBool::new(false);
555
        if STARTED.swap(true, Ordering::AcqRel) {
556
            return;
557
        }
558
        std::thread::Builder::new()
559
            .name("azul-peak-sampler".to_string())
560
            .spawn(|| loop {
561
                let now = phys_footprint_bytes();
562
                let prev = PEAK_PHYS_FOOTPRINT.load(Ordering::Relaxed);
563
                if now > prev {
564
                    PEAK_PHYS_FOOTPRINT.store(now, Ordering::Relaxed);
565
                }
566
                std::thread::sleep(std::time::Duration::from_micros(250));
567
            })
568
            .ok();
569
    }
570
}
571

            
572
#[cfg(feature = "probe")]
573
static PEAK_PHYS_FOOTPRINT: std::sync::atomic::AtomicU64 =
574
    std::sync::atomic::AtomicU64::new(0);
575

            
576
/// Read the peak `phys_footprint` seen by the background sampler.
577
/// Returns 0 if `start_peak_sampler` was never called.
578
#[cfg(feature = "probe")]
579
pub fn peak_phys_footprint_seen() -> u64 {
580
    PEAK_PHYS_FOOTPRINT.load(std::sync::atomic::Ordering::Relaxed)
581
}
582

            
583
/// Reset the global peak high-water mark to the current phys_footprint.
584
/// Paired with `peak_phys_footprint_seen()` so a caller can record
585
/// "peak during phase X" — call `reset_peak()` at phase entry, then
586
/// `peak_phys_footprint_seen()` at phase exit. The 500 Hz background
587
/// sampler runs continuously either way.
588
#[cfg(feature = "probe")]
589
pub fn reset_peak() {
590
    let now = phys_footprint_bytes();
591
    PEAK_PHYS_FOOTPRINT.store(now, std::sync::atomic::Ordering::Relaxed);
592
}
593

            
594
/// Record a phase's peak footprint into the probe event stream.
595
/// Call at phase exit after `reset_peak()` at phase entry. Emits an
596
/// RSS-kind event with `bytes = peak seen during phase`.
597
#[cfg(feature = "probe")]
598
#[inline]
599
pub fn sample_phase_peak(label: &'static str) {
600
    let peak = PEAK_PHYS_FOOTPRINT.load(std::sync::atomic::Ordering::Relaxed);
601
    Probe::sample_rss(label, peak);
602
}
603

            
604
#[cfg(not(feature = "probe"))]
605
#[inline(always)]
606
10955
pub fn reset_peak() {}
607

            
608
#[cfg(not(feature = "probe"))]
609
#[inline(always)]
610
10955
pub fn sample_phase_peak(_label: &'static str) {}
611

            
612
#[cfg(not(feature = "probe"))]
613
#[inline(always)]
614
pub fn malloc_heap_bytes() -> u64 { 0 }
615

            
616
/// Emit one `{"ev":"phase","label":L,"heap":N,"call":C}` line to the
617
/// JSONL file named by `AZ_PROFILE_OUT=<path>`. Only fires when
618
/// `AZ_PROFILE=heap,jsonl` is set *and* the path is given.
619
///
620
/// Each call auto-increments a monotonic `call` id so downstream
621
/// analyzers can group phases belonging to a single `regenerate_layout`
622
/// invocation.
623
///
624
/// `label` convention: `start` at function entry; `<step>` after each
625
/// phase completes; `end` at function exit. Heap Δ between adjacent
626
/// labels within the same call-id is the bytes retained by that phase.
627
///
628
/// Zero overhead when flags aren't set (two atomic loads). Zero overhead
629
/// when the `probe` feature is off (no-op stub).
630
#[cfg(feature = "probe")]
631
pub fn emit_phase_heap(label: &str) {
632
    use std::io::Write;
633
    if !heap_jsonl_enabled() { return; }
634
    let Some(p) = azul_core::profile::out_path() else { return };
635
    static CALL_ID: std::sync::atomic::AtomicU64 =
636
        std::sync::atomic::AtomicU64::new(0);
637
    // Auto-increment on every "start" label; "end" and intermediates reuse
638
    // the current id so all phases in one regenerate_layout invocation share
639
    // a call number.
640
    static CURRENT_CALL: std::sync::atomic::AtomicU64 =
641
        std::sync::atomic::AtomicU64::new(0);
642
    let call_id = if label == "start" {
643
        let next = CALL_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
644
        CURRENT_CALL.store(next, std::sync::atomic::Ordering::Relaxed);
645
        next
646
    } else {
647
        CURRENT_CALL.load(std::sync::atomic::Ordering::Relaxed)
648
    };
649
    let heap = malloc_heap_bytes();
650
    if let Ok(mut f) = std::fs::OpenOptions::new()
651
        .create(true)
652
        .append(true)
653
        .open(p)
654
    {
655
        let _ = writeln!(
656
            f,
657
            r#"{{"ev":"phase","call":{},"label":"{}","heap":{}}}"#,
658
            call_id, label, heap
659
        );
660
    }
661
}
662

            
663
#[cfg(not(feature = "probe"))]
664
#[inline(always)]
665
pub fn emit_phase_heap(_label: &str) {}
666

            
667
/// Like [`emit_phase_heap`] but attaches a numeric payload (e.g., a cache
668
/// size) to the JSONL record under the `"extra"` field.
669
///
670
/// Gated behind `AZ_PROFILE=heap,jsonl,detail` — the `detail` token opts
671
/// in to fine-grained probes that produce extra per-step records (one
672
/// per intermediate step inside a phase). Without `detail`, only the
673
/// coarser phase probes from [`emit_phase_heap`] fire.
674
#[cfg(feature = "probe")]
675
pub fn emit_phase_heap_extra(label: &str, extra: u64) {
676
    use std::io::Write;
677
    if !heap_jsonl_enabled() { return; }
678
    if !azul_core::profile::detail_enabled() { return; }
679
    let Some(p) = azul_core::profile::out_path() else { return };
680
    let heap = malloc_heap_bytes();
681
    if let Ok(mut f) = std::fs::OpenOptions::new()
682
        .create(true)
683
        .append(true)
684
        .open(p)
685
    {
686
        let _ = writeln!(
687
            f,
688
            r#"{{"ev":"phase","call":0,"label":"{}","heap":{},"extra":{}}}"#,
689
            label, heap, extra
690
        );
691
    }
692
}
693

            
694
#[cfg(not(feature = "probe"))]
695
#[inline(always)]
696
pub fn emit_phase_heap_extra(_label: &str, _extra: u64) {}
697

            
698
/// Both `heap` and `jsonl` tokens active in `AZ_PROFILE` — the combination
699
/// that enables JSONL heap-probe emission. Either alone is a no-op.
700
#[cfg(feature = "probe")]
701
#[inline]
702
fn heap_jsonl_enabled() -> bool {
703
    let f = azul_core::profile::flags();
704
    f.heap && f.jsonl
705
}
706

            
707
/// Returns true iff `AZ_PROFILE=detail` is active. Kept as a public
708
/// re-export so downstream crates can write `azul_layout::probe::detail_enabled()`
709
/// without pulling in `azul_core::profile` directly.
710
#[cfg(feature = "probe")]
711
#[inline]
712
pub fn detail_enabled() -> bool {
713
    azul_core::profile::detail_enabled()
714
}
715

            
716
#[cfg(not(feature = "probe"))]
717
#[inline(always)]
718
pub fn detail_enabled() -> bool { false }