tests: Add benchmark for IPI performance

Adds tests to better gauge IPI performance on SMP. In each case, one CPU is used as the source of IPIs while the remaining CPUs are busy doing "work". Every 30 seconds the benchmark reports on the amount of "work" done by the busy CPUs and the amount of work done by the CPU generating the IPIs. This can be used to ... 1. Show how enabling IPI optimization affects system performance 2. Show the cost of spinlock contention as the number of CPUs increase 3. Measure the relative performance of scheduler changes on SMP. Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
2025-02-13 14:53:20 -08:00 · 2025-02-13 14:53:20 -08:00 · 5c36567c56
commit 5c36567c56
parent 59b62243fa
10 changed files with 588 additions and 0 deletions
--- a/tests/benchmarks/ipi_metric/CMakeLists.txt
+++ b/tests/benchmarks/ipi_metric/CMakeLists.txt
@ -0,0 +1,26 @@
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.20.0)
+find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
+project(ipi_metric)
+
+#FILE(GLOB app_sources src/*.c)
+#target_sources(app PRIVATE ${app_sources})
+target_sources_ifdef(
+  CONFIG_IPI_METRIC_PREEMPTIVE
+  app
+  PRIVATE
+  src/ipi_metric_preemptive.c
+  )
+target_sources_ifdef(
+  CONFIG_IPI_METRIC_PRIMITIVE_BROADCAST
+  app
+  PRIVATE
+  src/ipi_metric_primitive.c
+  )
+target_sources_ifdef(
+  CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED
+  app
+  PRIVATE
+  src/ipi_metric_primitive.c
+  )
--- a/tests/benchmarks/ipi_metric/Kconfig
+++ b/tests/benchmarks/ipi_metric/Kconfig
@ -0,0 +1,37 @@
+# Copyright (c) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+mainmenu "IPI-Metric RTOS Test Suite"
+
+choice IPI_METRIC_TEST
+	prompt "Select an IPI-Metric test to execute"
+	default IPI_METRIC_PREEMPTIVE
+	help
+	  The IPI-Metric benchmark suite has a single CPU in an SMP system
+	  dedicated to generating IPIs under varying conditions while the
+	  remaining CPUs perform their "work" and process IPIs. These tests
+	  track the amount of "work" and the number of IPIs processed
+	  during 30 second time intervals.
+
+config IPI_METRIC_PREEMPTIVE
+	bool "IPIs are generated due threads preempting one another"
+	help
+	  The CPU generating the IPIs does so as a byproduct of resuming and
+	  suspending a series of preemptible threads.
+
+config IPI_METRIC_PRIMITIVE_BROADCAST
+	bool "IPIs are generated using primitive arch_sched_broadcast_ipi()"
+	help
+	  The CPU generating the IPIs does so by directly calling
+	  arch_sched_broadcast_ipi() to broadcast them to all CPUs.
+
+config IPI_METRIC_PRIMITIVE_DIRECTED
+	bool "IPIs are generated using primitive arch_sched_directed_ipi()"
+	depends on ARCH_HAS_DIRECTED_IPIS
+	help
+	  The CPU generating the IPIs does so by directly calling
+	  arch_sched_directed_ipi() to direct them to a single CPU.
+
+endchoice
+
+source "Kconfig.zephyr"
--- a/tests/benchmarks/ipi_metric/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf
+++ b/tests/benchmarks/ipi_metric/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf
@ -0,0 +1,4 @@
+# Copyright (c) 2022 Carlo Caione <ccaione@baylibre.com>
+# SPDX-License-Identifier: Apache-2.0
+
+CONFIG_MP_MAX_NUM_CPUS=4
--- a/tests/benchmarks/ipi_metric/boards/qemu_cortex_a53_qemu_cortex_a53_smp.overlay
+++ b/tests/benchmarks/ipi_metric/boards/qemu_cortex_a53_qemu_cortex_a53_smp.overlay
@ -0,0 +1,19 @@
+/* Copyright 2022 Carlo Caione <ccaione@baylibre.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/ {
+	cpus {
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <3>;
+		};
+	};
+};
--- a/tests/benchmarks/ipi_metric/boards/qemu_x86_64.conf
+++ b/tests/benchmarks/ipi_metric/boards/qemu_x86_64.conf
@ -0,0 +1 @@
+CONFIG_MP_MAX_NUM_CPUS=4
--- a/tests/benchmarks/ipi_metric/boards/qemu_x86_64.overlay
+++ b/tests/benchmarks/ipi_metric/boards/qemu_x86_64.overlay
@ -0,0 +1,15 @@
+/ {
+	cpus {
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "intel,x86_64";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "intel,x86_64";
+			reg = <3>;
+		};
+	};
+};
--- a/tests/benchmarks/ipi_metric/prj.conf
+++ b/tests/benchmarks/ipi_metric/prj.conf
@ -0,0 +1,31 @@
+# Default base configuration file
+
+# Use a tickless kernel to minimize the number of timer interrupts
+CONFIG_TICKLESS_KERNEL=y
+CONFIG_SYS_CLOCK_TICKS_PER_SEC=100
+
+# Optimize for speed
+CONFIG_SPEED_OPTIMIZATIONS=y
+
+# Disable time slicing
+CONFIG_TIMESLICING=n
+
+# Disabling hardware stack protection can greatly
+# improve system performance.
+CONFIG_HW_STACK_PROTECTION=n
+
+# Picolibc is faster than Zephyr's minimal libc memcpy
+CONFIG_PICOLIBC_SPEED_OPTIMIZATIONS=y
+CONFIG_PICOLIBC_USE_MODULE=y
+
+# Disable Thread Local Storage for better context switching times
+CONFIG_THREAD_LOCAL_STORAGE=n
+
+# Disable memory slab pointer validation
+CONFIG_MEM_SLAB_POINTER_VALIDATE=n
+
+# Allow for the number of scheduling IPIs to be tracked
+CONFIG_TRACE_SCHED_IPI=y
+
+# Enable smarter delivery of scheduling IPIs
+CONFIG_IPI_OPTIMIZE=y
--- a/tests/benchmarks/ipi_metric/src/ipi_metric_preemptive.c
+++ b/tests/benchmarks/ipi_metric/src/ipi_metric_preemptive.c
@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2023,2024 Intel Corporation.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <zephyr/kernel.h>
+
+#if CONFIG_MP_MAX_NUM_CPUS == 1
+#error "Test requires a system with more than 1 CPU"
+#endif
+
+#define IPI_TEST_INTERVAL_DURATION 30
+
+#define NUM_WORK_THREADS (CONFIG_MP_MAX_NUM_CPUS - 1)
+#define WORK_STACK_SIZE  4096
+
+#define NUM_PREEMPTIVE_THREADS 5
+#define PREEMPTIVE_STACK_SIZE 4096
+
+static K_THREAD_STACK_ARRAY_DEFINE(work_stack, NUM_WORK_THREADS, WORK_STACK_SIZE);
+static K_THREAD_STACK_ARRAY_DEFINE(preemptive_stack, NUM_PREEMPTIVE_THREADS, PREEMPTIVE_STACK_SIZE);
+
+static struct k_thread work_thread[NUM_WORK_THREADS];
+static unsigned long work_array[NUM_WORK_THREADS][1024];
+static volatile unsigned long work_counter[NUM_WORK_THREADS];
+
+static struct k_thread preemptive_thread[NUM_PREEMPTIVE_THREADS];
+static unsigned int preemptive_counter[NUM_PREEMPTIVE_THREADS];
+
+static atomic_t ipi_counter;
+
+void z_trace_sched_ipi(void)
+{
+	atomic_inc(&ipi_counter);
+}
+
+void work_entry(void *p1, void *p2, void *p3)
+{
+	unsigned int index = POINTER_TO_UINT(p1);
+	unsigned long *array = p2;
+	unsigned long counter;
+
+	while (1) {
+		for (unsigned int i = 0; i < 1024; i++) {
+			counter = work_counter[index]++;
+
+			array[i] = (array[i] + counter) ^ array[i];
+		}
+	}
+}
+
+void preemptive_entry(void *p1, void *p2, void *p3)
+{
+	unsigned int index = POINTER_TO_UINT(p1);
+
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
+	struct k_thread *suspend = NULL;
+	struct k_thread *resume = NULL;
+
+	if (index != (NUM_PREEMPTIVE_THREADS - 1)) {
+		resume = &preemptive_thread[index + 1];
+	}
+
+	if (index != 0) {
+		suspend = k_current_get();
+	}
+
+	while (1) {
+		if (resume != NULL) {
+			k_thread_resume(resume);
+		}
+
+		preemptive_counter[index]++;
+
+		if (suspend != NULL) {
+			k_thread_suspend(suspend);
+		}
+	}
+}
+
+void report(void)
+{
+	unsigned int elapsed_time = IPI_TEST_INTERVAL_DURATION;
+	unsigned long total_preempt;
+	unsigned long total_work;
+	unsigned long last_work_counter[NUM_WORK_THREADS] = {};
+	unsigned long last_preempt[NUM_PREEMPTIVE_THREADS] = {};
+	unsigned long tmp_work_counter[NUM_WORK_THREADS] = {};
+	unsigned long tmp_preempt[NUM_PREEMPTIVE_THREADS] = {};
+	unsigned int i;
+	unsigned int tmp_ipi_counter;
+
+	atomic_set(&ipi_counter, 0);
+
+	while (1) {
+		k_sleep(K_SECONDS(IPI_TEST_INTERVAL_DURATION));
+
+		/*
+		 * Get local copies of the counters to minimize
+		 * the impacts of delays from printf().
+		 */
+
+		total_work = 0;
+		for (i = 0; i < NUM_WORK_THREADS; i++) {
+			tmp_work_counter[i] = work_counter[i];
+			total_work += (tmp_work_counter[i] - last_work_counter[i]);
+		}
+
+		/* Sum the preemptive counters. */
+		total_preempt = 0;
+		for (i = 0; i < NUM_PREEMPTIVE_THREADS; i++) {
+			tmp_preempt[i] = preemptive_counter[i];
+			total_preempt += (tmp_preempt[i] - last_preempt[i]);
+		}
+
+		tmp_ipi_counter = (unsigned int)atomic_set(&ipi_counter, 0);
+
+		printf("**** IPI-Metric Basic Scheduling Test **** Elapsed Time: %u\n",
+		       elapsed_time);
+
+		printf("  Preemptive Counter Total: %lu\n", total_preempt);
+		for (i = 0; i < NUM_PREEMPTIVE_THREADS; i++) {
+			printf("    - Counter #%u: %lu\n",
+			       i, tmp_preempt[i] - last_preempt[i]);
+			last_preempt[i] = tmp_preempt[i];
+		}
+
+		printf("  IPI Count: %u\n", tmp_ipi_counter);
+
+		printf("  Total Work: %lu\n", total_work);
+
+		for (i = 0; i < NUM_WORK_THREADS; i++) {
+			printf("    - Work Counter #%u: %lu\n",
+			       i, tmp_work_counter[i] - last_work_counter[i]);
+			last_work_counter[i] = tmp_work_counter[i];
+		}
+
+		elapsed_time += IPI_TEST_INTERVAL_DURATION;
+	}
+}
+
+int main(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < NUM_WORK_THREADS; i++) {
+		k_thread_create(&work_thread[i], work_stack[i],
+				WORK_STACK_SIZE, work_entry,
+				UINT_TO_POINTER(i), work_array[i], NULL,
+				-1, 0, K_NO_WAIT);
+	}
+
+	/*
+	 * Create the preemptive threads and switch them to
+	 * the suspended state.
+	 */
+
+	for (i = 0; i < NUM_PREEMPTIVE_THREADS; i++) {
+		k_thread_create(&preemptive_thread[i], preemptive_stack[i],
+				PREEMPTIVE_STACK_SIZE, preemptive_entry,
+				UINT_TO_POINTER(i), NULL, NULL,
+				10 - i, 0, K_FOREVER);
+		k_thread_suspend(&preemptive_thread[i]);
+		k_wakeup(&preemptive_thread[i]);
+	}
+
+	k_thread_resume(&preemptive_thread[0]);
+
+	report();
+}
--- a/tests/benchmarks/ipi_metric/src/ipi_metric_primitive.c
+++ b/tests/benchmarks/ipi_metric/src/ipi_metric_primitive.c
@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2025 Intel Corporation.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <zephyr/kernel.h>
+
+#if CONFIG_MP_MAX_NUM_CPUS <= 1
+#error "Test requires a system with more than 1 CPU"
+#endif
+
+#define IPI_TEST_INTERVAL_DURATION 30
+
+#define NUM_WORK_THREADS (CONFIG_MP_MAX_NUM_CPUS - 1)
+#define WORK_STACK_SIZE  4096
+
+#define PRIMITIVE_STACK_SIZE 4096
+
+static K_THREAD_STACK_ARRAY_DEFINE(work_stack, NUM_WORK_THREADS, WORK_STACK_SIZE);
+static K_THREAD_STACK_DEFINE(primitive_stack, PRIMITIVE_STACK_SIZE);
+
+static struct k_thread work_thread[NUM_WORK_THREADS];
+static unsigned long work_array[NUM_WORK_THREADS][1024];
+static volatile unsigned long work_counter[NUM_WORK_THREADS];
+
+static struct k_thread primitive_thread;
+static volatile unsigned long primitives_issued;
+
+static atomic_t ipi_cpu_bitmap;
+
+void z_trace_sched_ipi(void)
+{
+	atomic_or(&ipi_cpu_bitmap, BIT(_current_cpu->id));
+}
+
+void work_entry(void *p1, void *p2, void *p3)
+{
+	unsigned int index = POINTER_TO_UINT(p1);
+	unsigned long *array = p2;
+	unsigned long counter;
+
+	while (1) {
+		for (unsigned int i = 0; i < 1024; i++) {
+			counter = work_counter[index]++;
+
+			array[i] = (array[i] + counter) ^ array[i];
+		}
+	}
+}
+
+void primitive_entry(void *p1, void *p2, void *p3)
+{
+	unsigned int desired_ipi_set;
+	unsigned int value;
+	int key;
+
+	ARG_UNUSED(p1);
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
+	/*
+	 * All other CPUs are executing cooperative threads and are not
+	 * expected to switch in a new thread. Select a CPU targeted for IPIs.
+	 */
+
+#ifdef CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED
+	key = arch_irq_lock();
+	desired_ipi_set = (_current_cpu->id == 0) ? BIT(1) : BIT(0);
+	arch_irq_unlock(key);
+#else
+	desired_ipi_set = (1 << arch_num_cpus()) - 1;
+	key = arch_irq_lock();
+	desired_ipi_set ^= BIT(_current_cpu->id);
+	arch_irq_unlock(key);
+#endif
+
+	while (1) {
+		atomic_set(&ipi_cpu_bitmap, 0);
+#ifdef CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED
+		arch_sched_directed_ipi(desired_ipi_set);
+#else
+		arch_sched_broadcast_ipi();
+#endif
+
+		primitives_issued++;
+
+		/*
+		 * Loop until all the expected CPUs have flagged that they
+		 * have processed the schedule IPI from above.
+		 */
+
+		while (1) {
+			value = (unsigned int)atomic_get(&ipi_cpu_bitmap);
+
+			/*
+			 * Note: z_trace_sched_ipi(), which is used to track
+			 * which CPUs processed an IPI, is not just called as a
+			 * result of the primitives arch_sched_directed_ipi()
+			 * or arch_sched_broadcast_ipi() above. Schedule IPIs
+			 * will also be sent when ticks are announced such as
+			 * when the k_sleep() in report() expires and this
+			 * benchmark can not control which CPUs will receive
+			 * those IPIs. To account for this, a mask is applied.
+			 */
+			if ((value & desired_ipi_set) == desired_ipi_set) {
+				break;
+			}
+
+			key = arch_irq_lock();
+			arch_spin_relax();
+			arch_irq_unlock(key);
+		}
+	}
+}
+
+void report(void)
+{
+	unsigned int elapsed_time = IPI_TEST_INTERVAL_DURATION;
+	unsigned int i;
+	unsigned long total;
+	unsigned long counter[NUM_WORK_THREADS];
+	unsigned long last_counter[NUM_WORK_THREADS] = {};
+	unsigned long last_issued = 0;
+	unsigned long interval_issued;
+
+	while (1) {
+		k_sleep(K_SECONDS(IPI_TEST_INTERVAL_DURATION));
+
+		total = 0;
+
+		for (i = 0; i < NUM_WORK_THREADS; i++) {
+			counter[i] = work_counter[i] - last_counter[i];
+			total += counter[i];
+			last_counter[i] = work_counter[i];
+		}
+
+		interval_issued = primitives_issued - last_issued;
+
+		printf("**** IPI-Metric %s IPI Test **** Elapsed Time: %u\n",
+		       IS_ENABLED(CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED) ?
+		       "Directed" : "Broadcast", elapsed_time);
+
+		printf("  Schedule IPIs Issued: %lu\n", interval_issued);
+		last_issued = primitives_issued;
+
+		printf("  Total Work: %lu\n", total);
+		for (i = 0; i < NUM_WORK_THREADS; i++) {
+			printf("   - Work Counter #%u: %lu\n",
+			       i, counter[i]);
+		}
+
+		elapsed_time += IPI_TEST_INTERVAL_DURATION;
+	}
+}
+
+int main(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < NUM_WORK_THREADS; i++) {
+		k_thread_create(&work_thread[i], work_stack[i],
+				WORK_STACK_SIZE, work_entry,
+				UINT_TO_POINTER(i), work_array[i], NULL,
+				-1, 0, K_NO_WAIT);
+	}
+
+	/* Create the primitive thread. */
+
+	k_thread_create(&primitive_thread, primitive_stack,
+			PRIMITIVE_STACK_SIZE, primitive_entry,
+			UINT_TO_POINTER(i), NULL, NULL,
+			10, 0, K_NO_WAIT);
+
+	report();
+}
--- a/tests/benchmarks/ipi_metric/testcase.yaml
+++ b/tests/benchmarks/ipi_metric/testcase.yaml
@ -0,0 +1,106 @@
+common:
+  platform_key:
+    - arch
+  tags:
+    - kernel
+    - benchmark
+  # Native platforms excluded as they are not relevant: These benchmarks run some kernel primitives
+  # in a loop during a predefined time counting how many times they execute. But in the POSIX arch,
+  # time does not pass while the CPU executes. So the benchmark just appears as if hung.
+  arch_exclude:
+    - posix
+  # some slow qemu_* excluded
+  platform_exclude:
+    - qemu_malta/qemu_malta
+    - qemu_malta/qemu_malta/be
+    - qemu_nios2
+  integration_platforms:
+    - qemu_x86_64
+    - qemu_cortex_a53/qemu_cortex_a53/smp
+  timeout: 300
+  filter: CONFIG_SMP and CONFIG_MP_MAX_NUM_CPUS > 1
+  harness: console
+
+tests:
+  benchmark.ipi_metric.preemptive.broadcast:
+    extra_configs:
+      - CONFIG_IPI_METRIC_PREEMPTIVE=y
+      - CONFIG_IPI_OPTIMIZE=n
+    harness_config:
+      type: multi_line
+      ordered: true
+      regex:
+        # Collect at least 3 measurements for each benchmark:
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
+        - "(.*)IPI Count:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
+        - "(.*)IPI Count:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
+        - "(.*)IPI Count:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+
+  benchmark.ipi_metric.preemptive.optimize:
+    extra_configs:
+      - CONFIG_IPI_METRIC_PREEMPTIVE=y
+      - CONFIG_IPI_OPTIMIZE=y
+    filter: ARCH_HAS_DIRECTED_IPIS
+    harness_config:
+      type: multi_line
+      ordered: true
+      regex:
+        # Collect at least 3 measurements for each benchmark:
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
+        - "(.*)IPI Count:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
+        - "(.*)IPI Count:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
+        - "(.*)IPI Count:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+
+  benchmark.ipi_metric.primitive.broadcast:
+    extra_configs:
+      - CONFIG_IPI_METRIC_PRIMITIVE_BROADCAST=y
+    harness_config:
+      type: multi_line
+      ordered: true
+      regex:
+        # Collect at least 3 measurements for each benchmark:
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+
+
+  benchmark.ipi_metric.primitive.directed:
+    extra_configs:
+      - CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED=y
+    filter: ARCH_HAS_DIRECTED_IPIS
+    harness_config:
+      type: multi_line
+      ordered: true
+      regex:
+        # Collect at least 3 measurements for each benchmark:
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"
+        - "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
+        - "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
+        - "(.*)Total Work:[ ]*[0-9]+(.*)"