Grcov report - probe.rs

1

//! Optional fine-grained timing + RSS instrumentation.

2

//!

3

//! Behind the `probe` feature flag every [`Probe::span`] returns a guard

4

//! that records the elapsed wall-clock on `Drop`, and

5

//! [`Probe::sample_rss`] records a labelled RSS checkpoint. Events are

6

//! buffered in a per-thread [`Vec`] and drained by the consumer with

7

//! [`Probe::drain`].

8

//!

9

//! With the feature off every method is a `#[inline(always)]` no-op so

10

//! release builds without the feature pay zero cost.

11

//!

12

//! Consumer (e.g. servo-shot) groups drained events by name to produce

13

//! the per-phase averages / p99s in its trace report.

14

15

use core::marker::PhantomData;

16

17

// WASM gate: `Instant::now()` panics on browser WASM (no monotonic clock)

18

// and `libc::getrusage` isn't available, so on `target_family = "wasm"`

19

// we drop to the no-op stubs even when the `probe` feature is on.

20

// `AZ_PROFILE=cpu` then prints "(probe unavailable on this target)"

21

// rather than crashing.

22

23

#[cfg(all(feature = "probe", not(target_family = "wasm")))]

24

mod imp {

25

    use std::cell::RefCell;

26

    use std::time::Instant;

27

28

    thread_local! {

29

        static EVENTS: RefCell<Vec<super::Event>> = const { RefCell::new(Vec::new()) };

30

31

32

    /// RAII guard that records its name + elapsed nanos on drop.

33

    pub struct Span {

34

        pub(crate) name: &'static str,

35

        pub(crate) start: Instant,

36

37

38

    impl Drop for Span {

39

        fn drop(&mut self) {

40

            let dur_ns = self.start.elapsed().as_nanos() as u64;

41

            // try_with (not with): the lifted-to-wasm web backend has no real

42

            // TLS, so `with` hits panic_access_error. These probe accesses are

43

            // inlined into layout_dom_recursive/layout_document, so they can't

44

            // be stubbed at the symbol level — use the non-panicking access.

45

            let _ = EVENTS.try_with(|cell| {

46

                cell.borrow_mut().push(super::Event {

47

                    name: self.name,

48

                    kind: super::EventKind::Span { dur_ns },

49

});

50

});

51

52

53

54

    pub(super) fn open(name: &'static str) -> Span {

55

        Span { name, start: Instant::now() }

56

57

58

    pub(super) fn sample_rss(label: &'static str, bytes: u64) {

59

        // try_with: see Span::drop — no real TLS in the lifted wasm backend.

60

        let _ = EVENTS.try_with(|cell| {

61

            cell.borrow_mut().push(super::Event {

62

                name: label,

63

                kind: super::EventKind::Rss { bytes },

64

});

65

});

66

67

68

    pub(super) fn drain() -> Vec<super::Event> {

69

        EVENTS

70

            .try_with(|cell| core::mem::take(&mut *cell.borrow_mut()))

71

            .unwrap_or_default()

72

73

74

    pub(super) fn drop_events() {

75

        let _ = EVENTS.try_with(|cell| cell.borrow_mut().clear());

76

77

78

    pub(super) fn peek_len() -> usize {

79

        EVENTS.try_with(|cell| cell.borrow().len()).unwrap_or(0)

80

81

82

    pub(super) fn enabled() -> bool {

83

        true

84

85

86

87

#[cfg(any(not(feature = "probe"), target_family = "wasm"))]

88

mod imp {

89

    pub struct Span;

90

91

    impl Drop for Span {

92

        #[inline(always)]

93

246540

        fn drop(&mut self) {}

94

95

96

    #[inline(always)]

97

246540

    pub(super) fn open(_name: &'static str) -> Span {

98

246540

        Span

99

246540

100

101

    #[inline(always)]

102

    pub(super) fn sample_rss(_label: &'static str, _bytes: u64) {}

103

104

    #[inline(always)]

105

    pub(super) fn drain() -> Vec<super::Event> {

106

        Vec::new()

107

108

109

    #[inline(always)]

110

    pub(super) fn drop_events() {}

111

112

    #[inline(always)]

113

    pub(super) fn peek_len() -> usize { 0 }

114

115

    #[inline(always)]

116

    pub(super) fn enabled() -> bool {

117

        false

118

119

120

121

/// Drained probe event. `Vec<Event>` is what consumers walk to render

122

/// trace summaries; the order is the order events fired in.

123

#[derive(Debug, Clone)]

124

pub struct Event {

125

    pub name: &'static str,

126

    pub kind: EventKind,

127

128

129

#[derive(Debug, Clone)]

130

pub enum EventKind {

131

    /// A timed scope's wall-clock duration.

132

    Span { dur_ns: u64 },

133

    /// A labelled RSS checkpoint.

134

    Rss { bytes: u64 },

135

136

137

/// Re-exported guard. Held by the caller of [`Probe::span`].

138

pub use imp::Span;

139

140

/// Probe API. All methods are no-ops without the `probe` feature.

141

pub struct Probe {

142

    _no_construct: PhantomData<()>,

143

144

145

impl Probe {

146

    /// Open a timed span. The returned guard records its name + nanos

147

    /// on drop into the thread-local event buffer.

148

    #[inline(always)]

149

246540

    pub fn span(name: &'static str) -> Span {

150

246540

        imp::open(name)

151

246540

152

153

    /// Record an RSS checkpoint with the given label + byte count. The

154

    /// caller supplies the bytes (this module does not depend on

155

    /// platform RSS readers) so consumers can use whatever measurement

156

    /// helper they own.

157

    #[inline(always)]

158

    pub fn sample_rss(label: &'static str, bytes: u64) {

159

        imp::sample_rss(label, bytes);

160

161

162

    /// Drain the per-thread event buffer.

163

    #[inline(always)]

164

    pub fn drain() -> Vec<Event> {

165

        imp::drain()

166

167

168

    /// Discard the per-thread event buffer without allocating a `Vec` to

169

    /// hand back. Used by long-running harnesses (e.g. `AZ_E2E_TEST`) that

170

    /// want to prevent the thread-local buffer from inflating RSS during

171

    /// thousands of layout passes without actually needing the events.

172

    #[inline(always)]

173

    pub fn drop_events() {

174

        imp::drop_events();

175

176

177

    /// Current number of events in the per-thread buffer. Cheap to call.

178

    #[inline(always)]

179

    pub fn peek_len() -> usize {

180

        imp::peek_len()

181

182

183

    /// Whether the `probe` feature is compiled in.

184

    #[inline(always)]

185

    pub fn enabled() -> bool {

186

        imp::enabled()

187

188

189

190

/// Same monotonic clock used by `font::parsed::monotonic_now_nanos` for

191

/// LRU stamping. Re-exported here so any caller that wants raw nanos

192

/// without going through a span guard has one source of truth.

193

#[inline]

194

pub fn monotonic_now_nanos() -> u64 {

195

    use std::sync::OnceLock;

196

    use std::time::Instant;

197

    static LAUNCH: OnceLock<Instant> = OnceLock::new();

198

    let start = LAUNCH.get_or_init(Instant::now);

199

    start.elapsed().as_nanos() as u64

200

201

202

/// Format drained probe events as a per-phase timing table to stderr.

203

///

204

/// Groups `EventKind::Span` by name and prints count / total / avg / p99 /

205

/// max in µs. `EventKind::Rss` checkpoints print in wall-clock order with

206

/// deltas so allocator purges are visible.

207

///

208

/// Sorted by total-ns descending so the slowest phase is on top — ideal

209

/// for spotting which phase spiked during a stuttering frame.

210

///

211

/// Called by `AZ_PROFILE=cpu` dumps (both initial layout and relayout),

212

/// and also by external consumers like `servo-shot --azul-trace`.

213

pub fn print_drained_events(label: &str, events: &[Event]) {

214

    use std::collections::BTreeMap;

215

216

    if events.is_empty() {

217

        if !Probe::enabled() {

218

            // Feature absent or target-family disabled (WASM): show "???"

219

            // instead of a misleading "compile with feature=probe" hint.

220

            eprintln!(

221

                "[CPU] {label}: probe unavailable on this target (timings = ???)"

222

);

223

        } else {

224

            eprintln!("[CPU] {label}: no events recorded this pass");

225

226

        return;

227

228

229

    let mut spans: BTreeMap<&'static str, Vec<u64>> = BTreeMap::new();

230

    let mut rss_marks: Vec<(&'static str, u64)> = Vec::new();

231

    for ev in events {

232

        match ev.kind {

233

            EventKind::Span { dur_ns } => spans.entry(ev.name).or_default().push(dur_ns),

234

            EventKind::Rss { bytes } => rss_marks.push((ev.name, bytes)),

235

236

237

238

    let mut rows: Vec<(&'static str, usize, u64, u64, u64, u64)> = spans

239

        .into_iter()

240

        .map(|(name, mut ns)| {

241

            ns.sort_unstable();

242

            let n = ns.len();

243

            let total: u128 = ns.iter().map(|&x| x as u128).sum();

244

            let avg = (total / n.max(1) as u128) as u64;

245

            let p99 = ns[(n.saturating_sub(1) * 99) / 100];

246

            let max = *ns.last().unwrap();

247

            (name, n, total as u64, avg, p99, max)

248

})

249

        .collect();

250

    rows.sort_by(|a, b| b.2.cmp(&a.2));

251

252

    eprintln!("[CPU] === {label} ({} phases) ===", rows.len());

253

    eprintln!(

254

        "[CPU] {:<28}  {:>5}  {:>10}  {:>9}  {:>9}  {:>9}",

255

        "phase", "n", "total(µs)", "avg(µs)", "p99(µs)", "max(µs)"

256

);

257

    for (name, n, total, avg, p99, max) in &rows {

258

        eprintln!(

259

            "[CPU] {:<28}  {:>5}  {:>10.1}  {:>9.2}  {:>9.2}  {:>9.2}",

260

            name,

261

n,

262

            (*total as f64) / 1_000.0,

263

            (*avg as f64) / 1_000.0,

264

            (*p99 as f64) / 1_000.0,

265

            (*max as f64) / 1_000.0,

266

);

267

268

    if !rss_marks.is_empty() {

269

        eprintln!("[CPU]   -- RSS checkpoints (wall-clock order) --");

270

        let mut prev: Option<u64> = None;

271

        for (lbl, bytes) in &rss_marks {

272

            let delta = prev

273

                .map(|p| {

274

                    let diff = *bytes as i128 - p as i128;

275

                    if diff >= 0 {

276

                        format!("  (Δ +{:.2} MiB)", diff as f64 / 1048576.0)

277

                    } else {

278

                        format!("  (Δ -{:.2} MiB)", -diff as f64 / 1048576.0)

279

280

})

281

                .unwrap_or_default();

282

            eprintln!(

283

                "[CPU]   {:<28}  {:.2} MiB{}",

284

                lbl,

285

                *bytes as f64 / 1048576.0,

286

                delta

287

);

288

            prev = Some(*bytes);

289

290

291

292

293

/// Convenience wrapper: sample the process's **current** resident set

294

/// (not peak) via `task_info` on macOS / `/proc/self/statm` on Linux and

295

/// push it into the probe event buffer under the given label.

296

///

297

/// Using current RSS (not `getrusage.ru_maxrss`) is essential so that

298

/// allocator purges are visible — peak RSS only moves up. Name kept as

299

/// `sample_peak_rss` for backwards compatibility with existing

300

/// checkpoint labels; semantically it is "sample current".

301

#[inline]

302

31010

pub fn sample_peak_rss(label: &'static str) {

303

    #[cfg(feature = "probe")]

304

305

        let (current, _virt) = current_rss_bytes();

306

        let bytes = if current != 0 { current } else { peak_rss_bytes_self() };

307

        Probe::sample_rss(label, bytes);

308

309

    #[cfg(not(feature = "probe"))]

310

31010

    let _ = label;

311

31010

312

313

#[cfg(feature = "probe")]

314

pub fn peak_rss_bytes_pub() -> u64 { peak_rss_bytes_self() }

315

316

#[cfg(feature = "probe")]

317

fn peak_rss_bytes_self() -> u64 {

318

    #[cfg(unix)]

319

    unsafe {

320

        let mut ru: libc::rusage = core::mem::zeroed();

321

        if libc::getrusage(libc::RUSAGE_SELF, &mut ru) != 0 {

322

            return 0;

323

324

        let raw = ru.ru_maxrss as u64;

325

        if cfg!(target_os = "macos") { raw } else { raw.saturating_mul(1024) }

326

327

    #[cfg(not(unix))]

328

329

330

331

332

333

/// Ask the active global allocator to return freed pages to the OS.

334

///

335

/// - With `allocator_mimalloc` feature: calls `mi_collect(true)`, which

336

///   aggressively returns pages (matches `az_purge_allocator` in azul-dll).

337

/// - With `allocator_jemalloc` feature: calls `mallctl("arena.0.purge")`.

338

/// - Otherwise on macOS: falls back to `malloc_zone_pressure_relief`

339

///   which drains the system zone (no-op when a third-party allocator

340

///   is the global one — hence the explicit feature flags above).

341

/// - Other platforms with default allocator: no-op.

342

///

343

/// Call after major allocations are freed (e.g. after a layout pass).

344

#[inline]

345

5005

pub fn hint_purge_allocator() {

346

    #[cfg(feature = "allocator_mimalloc")]

347

348

        // Aggressive purge — returns arenas to the OS when possible.

349

        unsafe {

350

            libmimalloc_sys::mi_collect(true);

351

352

        static PURGE_TRACE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();

353

        if *PURGE_TRACE.get_or_init(azul_core::profile::memory_enabled) {

354

            let (rss, _) = current_rss_bytes();

355

            eprintln!("[PURGE] mi_collect(true) called — current rss={:.2} MiB", rss as f64 / 1048576.0);

356

357

        return;

358

359

    #[cfg(feature = "allocator_jemalloc")]

360

361

        // Purge all arenas. `arena.<i>.purge` with i = MALLCTL_ARENAS_ALL.

362

        unsafe {

363

            let _ = tikv_jemalloc_sys::mallctl(

364

                b"arena.4096.purge\0".as_ptr() as *const _,

365

                core::ptr::null_mut(),

366

                core::ptr::null_mut(),

367

                core::ptr::null_mut(),

368

0,

369

);

370

371

        return;

372

373

    #[cfg(all(target_os = "macos", not(any(feature = "allocator_mimalloc", feature = "allocator_jemalloc"))))]

374

375

        extern "C" {

376

            fn malloc_zone_pressure_relief(zone: *mut core::ffi::c_void, goal: usize) -> usize;

377

378

        unsafe {

379

            malloc_zone_pressure_relief(core::ptr::null_mut(), 0);

380

381

382

5005

383

384

/// Sample the process's "real" memory footprint (not peak).

385

/// Returns (footprint_bytes, virtual_bytes). On macOS this is

386

/// `phys_footprint` from `TASK_VM_INFO` — matches Activity Monitor

387

/// "Memory" and `vmmap`'s "Physical footprint" line, and excludes

388

/// shared library text pages that would otherwise inflate RSS

389

/// without costing the process anything uniquely. On Linux this

390

/// falls back to `/proc/self/statm` resident size (no direct

391

/// equivalent; the shared-lib inflation is much smaller there).

392

/// More useful than `getrusage.ru_maxrss` which only moves upward.

393

#[cfg(feature = "probe")]

394

pub fn current_rss_bytes() -> (u64, u64) {

395

    #[cfg(target_os = "macos")]

396

397

        // Prefer phys_footprint (TASK_VM_INFO). Fall back to

398

        // resident_size (MACH_TASK_BASIC_INFO) if the bigger struct

399

        // isn't populated for some reason.

400

        let pf = phys_footprint_bytes();

401

        #[repr(C)]

402

        struct MachTaskBasicInfo {

403

            virtual_size: u64,

404

            resident_size: u64,

405

            resident_size_max: u64,

406

            user_time: [u32; 2],

407

            system_time: [u32; 2],

408

            policy: i32,

409

            suspend_count: i32,

410

411

        const MACH_TASK_BASIC_INFO: u32 = 20;

412

        extern "C" {

413

            fn mach_task_self() -> u32;

414

            fn task_info(

415

                target: u32, flavor: u32,

416

                info: *mut core::ffi::c_void, count: *mut u32,

417

            ) -> i32;

418

419

        unsafe {

420

            let mut info: MachTaskBasicInfo = core::mem::zeroed();

421

            let mut count = (core::mem::size_of::<MachTaskBasicInfo>() / 4) as u32;

422

            let kr = task_info(

423

                mach_task_self(),

424

                MACH_TASK_BASIC_INFO,

425

                &mut info as *mut _ as *mut core::ffi::c_void,

426

                &mut count,

427

);

428

            if kr == 0 {

429

                let rss = if pf != 0 { pf } else { info.resident_size };

430

                (rss, info.virtual_size)

431

            } else {

432

                (pf, 0)

433

434

435

436

    #[cfg(not(target_os = "macos"))]

437

    { (0, 0) }

438

439

440

/// Heap bytes currently held by the libc allocator (`mstats.bytes_used`).

441

///

442

/// Unlike RSS, this is what *Rust* allocations plus anything else going

443

/// through the default malloc zone is actually holding — mmap regions

444

/// for thread stacks, GL buffers, file-mapped fonts, etc. are NOT counted.

445

/// A leak that shows up here points to a genuine heap retention (an Arc

446

/// chain never dropped, a Vec never shrunk, a `Box<T>` forgotten).

447

/// Returns 0 on non-macOS.

448

#[cfg(feature = "probe")]

449

pub fn malloc_heap_bytes() -> u64 {

450

    #[cfg(target_os = "macos")]

451

452

        #[repr(C)]

453

        struct Mstats {

454

            bytes_total: usize,

455

            chunks_used: usize,

456

            bytes_used: usize,

457

            chunks_free: usize,

458

            bytes_free: usize,

459

460

        extern "C" {

461

            fn mstats() -> Mstats;

462

463

        unsafe { mstats().bytes_used as u64 }

464

465

    #[cfg(not(target_os = "macos"))]

466

    { 0 }

467

468

469

/// Sample the Mach `phys_footprint` — the memory metric Activity

470

/// Monitor and `vmmap`'s "Physical footprint" line display. Unlike

471

/// `resident_size`, this excludes shared library text pages and

472

/// other kernel-mapped regions that inflate the traditional RSS

473

/// number without actually costing the process anything. For a

474

/// short-lived headless render this is a much more honest figure:

475

/// on a ~20 MiB ru_maxrss run, phys_footprint is typically ~8 MiB.

476

/// Returns 0 on non-macOS or if the Mach call fails.

477

///

478

/// There's no direct "peak phys_footprint" field; track the max

479

/// across calls in application code if you need it.

480

#[cfg(feature = "probe")]

481

pub fn phys_footprint_bytes() -> u64 {

482

    #[cfg(target_os = "macos")]

483

484

        // TASK_VM_INFO = 22; the struct is large (~88 u32 counts ≈ 352 B)

485

        // and phys_footprint lives near the end, so we have to read the

486

        // whole thing. Layout is from osfmk/mach/task_info.h.

487

        #[repr(C)]

488

        struct TaskVmInfo {

489

            virtual_size: u64,

490

            region_count: u32,

491

            page_size: u32,

492

            resident_size: u64,

493

            resident_size_peak: u64,

494

            device: u64,

495

            device_peak: u64,

496

            internal: u64,

497

            internal_peak: u64,

498

            external: u64,

499

            external_peak: u64,

500

            reusable: u64,

501

            reusable_peak: u64,

502

            purgeable_volatile_pmap: u64,

503

            purgeable_volatile_resident: u64,

504

            purgeable_volatile_virtual: u64,

505

            compressed: u64,

506

            compressed_peak: u64,

507

            compressed_lifetime: u64,

508

            phys_footprint: u64,

509

            // there are more fields after this, but we don't need them

510

            _rest: [u64; 12],

511

512

        const TASK_VM_INFO: u32 = 22;

513

        extern "C" {

514

            fn mach_task_self() -> u32;

515

            fn task_info(

516

                target: u32, flavor: u32,

517

                info: *mut core::ffi::c_void, count: *mut u32,

518

            ) -> i32;

519

520

        unsafe {

521

            let mut info: TaskVmInfo = core::mem::zeroed();

522

            let mut count = (core::mem::size_of::<TaskVmInfo>() / 4) as u32;

523

            let kr = task_info(

524

                mach_task_self(),

525

                TASK_VM_INFO,

526

                &mut info as *mut _ as *mut core::ffi::c_void,

527

                &mut count,

528

);

529

            if kr == 0 { info.phys_footprint } else { 0 }

530

531

532

    #[cfg(not(target_os = "macos"))]

533

    { 0 }

534

535

536

/// Background sampler for peak phys_footprint. Spawns a thread that

537

/// polls `phys_footprint_bytes()` every ~2 ms and updates a shared

538

/// atomic. The kernel does not expose a direct "peak phys_footprint"

539

/// — unlike `resident_size_peak` in TASK_VM_INFO — so polling is

540

/// the only way to catch mid-phase transients that are MADV_FREE'd

541

/// before the next explicit sample point.

542

///

543

/// Not started by default; call `start_peak_sampler()` once at

544

/// process init if you want peak tracking. Overhead is negligible

545

/// (~1-5 µs per poll on macOS, 500 Hz → <0.25% CPU of one core).

546

/// `peak_phys_footprint_seen()` reads the current high-water mark.

547

#[cfg(feature = "probe")]

548

pub fn start_peak_sampler() {

549

    #[cfg(target_os = "macos")]

550

551

        use std::sync::atomic::Ordering;

552

        // Idempotent — only spawns once.

553

        static STARTED: std::sync::atomic::AtomicBool =

554

            std::sync::atomic::AtomicBool::new(false);

555

        if STARTED.swap(true, Ordering::AcqRel) {

556

            return;

557

558

        std::thread::Builder::new()

559

            .name("azul-peak-sampler".to_string())

560

            .spawn(|| loop {

561

                let now = phys_footprint_bytes();

562

                let prev = PEAK_PHYS_FOOTPRINT.load(Ordering::Relaxed);

563

                if now > prev {

564

                    PEAK_PHYS_FOOTPRINT.store(now, Ordering::Relaxed);

565

566

                std::thread::sleep(std::time::Duration::from_micros(250));

567

})

568

            .ok();

569

570

571

572

#[cfg(feature = "probe")]

573

static PEAK_PHYS_FOOTPRINT: std::sync::atomic::AtomicU64 =

574

    std::sync::atomic::AtomicU64::new(0);

575

576

/// Read the peak `phys_footprint` seen by the background sampler.

577

/// Returns 0 if `start_peak_sampler` was never called.

578

#[cfg(feature = "probe")]

579

pub fn peak_phys_footprint_seen() -> u64 {

580

    PEAK_PHYS_FOOTPRINT.load(std::sync::atomic::Ordering::Relaxed)

581

582

583

/// Reset the global peak high-water mark to the current phys_footprint.

584

/// Paired with `peak_phys_footprint_seen()` so a caller can record

585

/// "peak during phase X" — call `reset_peak()` at phase entry, then

586

/// `peak_phys_footprint_seen()` at phase exit. The 500 Hz background

587

/// sampler runs continuously either way.

588

#[cfg(feature = "probe")]

589

pub fn reset_peak() {

590

    let now = phys_footprint_bytes();

591

    PEAK_PHYS_FOOTPRINT.store(now, std::sync::atomic::Ordering::Relaxed);

592

593

594

/// Record a phase's peak footprint into the probe event stream.

595

/// Call at phase exit after `reset_peak()` at phase entry. Emits an

596

/// RSS-kind event with `bytes = peak seen during phase`.

597

#[cfg(feature = "probe")]

598

#[inline]

599

pub fn sample_phase_peak(label: &'static str) {

600

    let peak = PEAK_PHYS_FOOTPRINT.load(std::sync::atomic::Ordering::Relaxed);

601

    Probe::sample_rss(label, peak);

602

603

604

#[cfg(not(feature = "probe"))]

605

#[inline(always)]

606

10955

pub fn reset_peak() {}

607

608

#[cfg(not(feature = "probe"))]

609

#[inline(always)]

610

10955

pub fn sample_phase_peak(_label: &'static str) {}

611

612

#[cfg(not(feature = "probe"))]

613

#[inline(always)]

614

pub fn malloc_heap_bytes() -> u64 { 0 }

615

616

/// Emit one `{"ev":"phase","label":L,"heap":N,"call":C}` line to the

617

/// JSONL file named by `AZ_PROFILE_OUT=<path>`. Only fires when

618

/// `AZ_PROFILE=heap,jsonl` is set *and* the path is given.

619

///

620

/// Each call auto-increments a monotonic `call` id so downstream

621

/// analyzers can group phases belonging to a single `regenerate_layout`

622

/// invocation.

623

///

624

/// `label` convention: `start` at function entry; `<step>` after each

625

/// phase completes; `end` at function exit. Heap Δ between adjacent

626

/// labels within the same call-id is the bytes retained by that phase.

627

///

628

/// Zero overhead when flags aren't set (two atomic loads). Zero overhead

629

/// when the `probe` feature is off (no-op stub).

630

#[cfg(feature = "probe")]

631

pub fn emit_phase_heap(label: &str) {

632

    use std::io::Write;

633

    if !heap_jsonl_enabled() { return; }

634

    let Some(p) = azul_core::profile::out_path() else { return };

635

    static CALL_ID: std::sync::atomic::AtomicU64 =

636

        std::sync::atomic::AtomicU64::new(0);

637

    // Auto-increment on every "start" label; "end" and intermediates reuse

638

    // the current id so all phases in one regenerate_layout invocation share

639

    // a call number.

640

    static CURRENT_CALL: std::sync::atomic::AtomicU64 =

641

        std::sync::atomic::AtomicU64::new(0);

642

    let call_id = if label == "start" {

643

        let next = CALL_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;

644

        CURRENT_CALL.store(next, std::sync::atomic::Ordering::Relaxed);

645

        next

646

    } else {

647

        CURRENT_CALL.load(std::sync::atomic::Ordering::Relaxed)

648

};

649

    let heap = malloc_heap_bytes();

650

    if let Ok(mut f) = std::fs::OpenOptions::new()

651

        .create(true)

652

        .append(true)

653

        .open(p)

654

655

        let _ = writeln!(

656

f,

657

            r#"{{"ev":"phase","call":{},"label":"{}","heap":{}}}"#,

658

            call_id, label, heap

659

);

660

661

662

663

#[cfg(not(feature = "probe"))]

664

#[inline(always)]

665

pub fn emit_phase_heap(_label: &str) {}

666

667

/// Like [`emit_phase_heap`] but attaches a numeric payload (e.g., a cache

668

/// size) to the JSONL record under the `"extra"` field.

669

///

670

/// Gated behind `AZ_PROFILE=heap,jsonl,detail` — the `detail` token opts

671

/// in to fine-grained probes that produce extra per-step records (one

672

/// per intermediate step inside a phase). Without `detail`, only the

673

/// coarser phase probes from [`emit_phase_heap`] fire.

674

#[cfg(feature = "probe")]

675

pub fn emit_phase_heap_extra(label: &str, extra: u64) {

676

    use std::io::Write;

677

    if !heap_jsonl_enabled() { return; }

678

    if !azul_core::profile::detail_enabled() { return; }

679

    let Some(p) = azul_core::profile::out_path() else { return };

680

    let heap = malloc_heap_bytes();

681

    if let Ok(mut f) = std::fs::OpenOptions::new()

682

        .create(true)

683

        .append(true)

684

        .open(p)

685

686

        let _ = writeln!(

687

f,

688

            r#"{{"ev":"phase","call":0,"label":"{}","heap":{},"extra":{}}}"#,

689

            label, heap, extra

690

);

691

692

693

694

#[cfg(not(feature = "probe"))]

695

#[inline(always)]

696

pub fn emit_phase_heap_extra(_label: &str, _extra: u64) {}

697

698

/// Both `heap` and `jsonl` tokens active in `AZ_PROFILE` — the combination

699

/// that enables JSONL heap-probe emission. Either alone is a no-op.

700

#[cfg(feature = "probe")]

701

#[inline]

702

fn heap_jsonl_enabled() -> bool {

703

    let f = azul_core::profile::flags();

704

    f.heap && f.jsonl

705

706

707

/// Returns true iff `AZ_PROFILE=detail` is active. Kept as a public

708

/// re-export so downstream crates can write `azul_layout::probe::detail_enabled()`

709

/// without pulling in `azul_core::profile` directly.

710

#[cfg(feature = "probe")]

711

#[inline]

712

pub fn detail_enabled() -> bool {

713

    azul_core::profile::detail_enabled()

714

715

716

#[cfg(not(feature = "probe"))]

717

#[inline(always)]

718

pub fn detail_enabled() -> bool { false }