/* Autogenerated file, DO NOT EDIT manually! generated by perf-metricset-codegen.py
 *
 * Copyright (c) 2018 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <assert.h>

#include "i915_drm.h"

#include "i915_perf_metrics_kblgt2.h"
#include "i915_perf_equations.h"
#include "i915_perf_registers_kblgt2.h"

static void
kblgt2_add_render_basic_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Render Metrics Basic set";
    metric_set->symbol_name = "RenderBasic";
    metric_set->hw_config_guid = "99c1a40e-a090-4354-86e3-4d068bb1917e";
    metric_set->counters = calloc(52, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_render_basic_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__render_basic__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__render_basic__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_basic__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_basic__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__render_basic__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__render_basic__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_basic__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Depth Throughput";
    counter->symbol_name = "GtiDepthThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_depth_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_depth_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI HDC TLB Lookup Throughput";
    counter->symbol_name = "GtiHdcLookupsThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_hdc_lookups_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_hdc_lookups_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI L3 Throughput";
    counter->symbol_name = "GtiL3Throughput";
    counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_l3_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_l3_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI RCC Throughput";
    counter->symbol_name = "GtiRccThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_rcc_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_rcc_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI/Color Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Read Throughput";
    counter->symbol_name = "GtiReadThroughput";
    counter->desc = "The total number of GPU memory bytes read from GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_read_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_read_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Fixed Pipe Throughput";
    counter->symbol_name = "GtiVfThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_vf_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_vf_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI/3D Pipe");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Write Throughput";
    counter->symbol_name = "GtiWriteThroughput";
    counter->desc = "The total number of GPU memory bytes written to GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__gti_write_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__gti_write_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_basic__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Lookup Accesses w/o IC";
    counter->symbol_name = "L3Lookups";
    counter->desc = "The total number of L3 cache lookup accesses w/o IC.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_basic__l3_lookups__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/TAG");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Misses";
    counter->symbol_name = "L3Misses";
    counter->desc = "The total number of L3 misses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_basic__l3_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/TAG");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Sampler Throughput";
    counter->symbol_name = "L3SamplerThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__l3_sampler_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__l3_sampler_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Sampler");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__render_basic__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_basic__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    if (perf->devinfo.subslice_mask & 0x09) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Sampler 0 Bottleneck";
        counter->symbol_name = "Sampler0Bottleneck";
        counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__render_basic__sampler0_bottleneck__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x09) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Sampler 0 Busy";
        counter->symbol_name = "Sampler0Busy";
        counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__render_basic__sampler0_busy__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x12) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Sampler 1 Bottleneck";
        counter->symbol_name = "Sampler1Bottleneck";
        counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__render_basic__sampler1_bottleneck__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x12) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Sampler 1 Busy";
        counter->symbol_name = "Sampler1Busy";
        counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__render_basic__sampler1_busy__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "Sampler");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samplers Bottleneck";
    counter->symbol_name = "SamplerBottleneck";
    counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__sampler_bottleneck__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "Sampler");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Cache Misses";
    counter->symbol_name = "SamplerL1Misses";
    counter->desc = "The total number of sampler cache misses in all LODs in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_basic__sampler_l1_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__render_basic__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__render_basic__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samplers Busy";
    counter->symbol_name = "SamplersBusy";
    counter->desc = "The percentage of time in which samplers have been processing EU requests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__samplers_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "Sampler");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_basic__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_basic__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_basic__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_basic__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__render_basic__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_basic__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__render_basic__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_basic__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_basic__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 52);
}

static void
kblgt2_add_compute_basic_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Compute Metrics Basic set";
    metric_set->symbol_name = "ComputeBasic";
    metric_set->hw_config_guid = "12994724-3a5a-4303-bb3c-ba0175d2c200";
    metric_set->counters = calloc(39, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_compute_basic_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__compute_basic__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__compute_basic__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_basic__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_basic__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU AVG IPC Rate";
    counter->symbol_name = "EuAvgIpcRate";
    counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NUMBER;
    counter->read_float = kblgt2__compute_basic__eu_avg_ipc_rate__read;
    counter->max_float = kblgt2__compute_basic__eu_avg_ipc_rate__max;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Send Pipe Active";
    counter->symbol_name = "EuSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__eu_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Thread Occupancy";
    counter->symbol_name = "EuThreadOccupancy";
    counter->desc = "The percentage of time in which hardware threads occupied EUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__eu_thread_occupancy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Pipe Active";
    counter->symbol_name = "Fpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Pipe Active";
    counter->symbol_name = "Fpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_basic__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__compute_basic__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__compute_basic__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_basic__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Read Throughput";
    counter->symbol_name = "GtiReadThroughput";
    counter->desc = "The total number of GPU memory bytes read from GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__gti_read_throughput__read;
    counter->max_uint64 = kblgt2__compute_basic__gti_read_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Write Throughput";
    counter->symbol_name = "GtiWriteThroughput";
    counter->desc = "The total number of GPU memory bytes written to GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__gti_write_throughput__read;
    counter->max_uint64 = kblgt2__compute_basic__gti_write_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_basic__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__compute_basic__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_basic__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__compute_basic__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__compute_basic__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_basic__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_basic__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_basic__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_basic__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__compute_basic__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__compute_basic__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Typed Bytes Read";
    counter->symbol_name = "TypedBytesRead";
    counter->desc = "The total number of typed memory bytes read via Data Port.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__typed_bytes_read__read;
    counter->max_uint64 = kblgt2__compute_basic__typed_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Typed Bytes Written";
    counter->symbol_name = "TypedBytesWritten";
    counter->desc = "The total number of untyped memory bytes written via Data Port.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__typed_bytes_written__read;
    counter->max_uint64 = kblgt2__compute_basic__typed_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Untyped Bytes Read";
    counter->symbol_name = "UntypedBytesRead";
    counter->desc = "The total number of typed memory bytes read via Data Port.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__untyped_bytes_read__read;
    counter->max_uint64 = kblgt2__compute_basic__untyped_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Untyped Writes";
    counter->symbol_name = "UntypedBytesWritten";
    counter->desc = "The total number of untyped memory bytes written via Data Port.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_basic__untyped_bytes_written__read;
    counter->max_uint64 = kblgt2__compute_basic__untyped_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_basic__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 39);
}

static void
kblgt2_add_render_pipe_profile_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Render Metrics set for 3D Pipeline Profile";
    metric_set->symbol_name = "RenderPipeProfile";
    metric_set->hw_config_guid = "d7a17a3a-ca71-40d2-a919-ace80d50633f";
    metric_set->counters = calloc(43, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_render_pipe_profile_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__render_pipe_profile__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__render_pipe_profile__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "BC Bottleneck";
    counter->symbol_name = "BcBottleneck";
    counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__bc_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Barycentric Calc");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Clipper Bottleneck";
    counter->symbol_name = "ClBottleneck";
    counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__cl_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Clipper");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CL Stall";
    counter->symbol_name = "ClStall";
    counter->desc = "The percentage of time in which clipper pipeline stage was stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__cl_stall__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Clipper");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_pipe_profile__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Bottleneck";
    counter->symbol_name = "DsBottleneck";
    counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__ds_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Stall";
    counter->symbol_name = "DsStall";
    counter->desc = "The percentage of time in which domain shader pipeline stage was stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__ds_stall__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_pipe_profile__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Bottleneck";
    counter->symbol_name = "EarlyDepthBottleneck";
    counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__early_depth_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__render_pipe_profile__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__render_pipe_profile__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Bottleneck";
    counter->symbol_name = "GsBottleneck";
    counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__gs_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_pipe_profile__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Hi-Depth Bottleneck";
    counter->symbol_name = "HiDepthBottleneck";
    counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__hi_depth_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Bottleneck";
    counter->symbol_name = "HsBottleneck";
    counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__hs_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Stall";
    counter->symbol_name = "HsStall";
    counter->desc = "The percentage of time in which hull stall pipeline stage was stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__hs_stall__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_pipe_profile__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_pipe_profile__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__render_pipe_profile__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_pipe_profile__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__render_pipe_profile__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Strip-Fans Bottleneck";
    counter->symbol_name = "SfBottleneck";
    counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__sf_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Strip-Fans");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SF Stall";
    counter->symbol_name = "SfStall";
    counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__sf_stall__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Strip-Fans");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_pipe_profile__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_pipe_profile__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__render_pipe_profile__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_pipe_profile__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__render_pipe_profile__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__render_pipe_profile__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__render_pipe_profile__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SO Bottleneck";
    counter->symbol_name = "SoBottleneck";
    counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__so_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Stream Output");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SO Stall";
    counter->symbol_name = "SoStall";
    counter->desc = "The percentage of time in which stream-output pipeline stage was stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__so_stall__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Stream Output");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VF Bottleneck";
    counter->symbol_name = "VfBottleneck";
    counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__vf_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Input Assembler");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Bottleneck";
    counter->symbol_name = "VsBottleneck";
    counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__render_pipe_profile__vs_bottleneck__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__render_pipe_profile__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 43);
}

static void
kblgt2_add_memory_reads_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Memory Reads Distribution metrics set";
    metric_set->symbol_name = "MemoryReads";
    metric_set->hw_config_guid = "57b59202-172b-477a-87de-33f85572c589";
    metric_set->counters = calloc(41, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_memory_reads_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__memory_reads__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__memory_reads__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_reads__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_reads__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__memory_reads__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__memory_reads__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__memory_reads__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__memory_reads__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__memory_reads__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_reads__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiCmdStreamerMemoryReads";
    counter->symbol_name = "GtiCmdStreamerMemoryReads";
    counter->desc = "The total number of GTI memory reads from Command Streamer.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_cmd_streamer_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/3D Pipe/Command Streamer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiHizMemoryReads";
    counter->symbol_name = "GtiHizMemoryReads";
    counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_hiz_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank0Reads";
    counter->symbol_name = "GtiL3Bank0Reads";
    counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_l3_bank0_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank1Reads";
    counter->symbol_name = "GtiL3Bank1Reads";
    counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_l3_bank1_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank2Reads";
    counter->symbol_name = "GtiL3Bank2Reads";
    counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_l3_bank2_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank3Reads";
    counter->symbol_name = "GtiL3Bank3Reads";
    counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_l3_bank3_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Reads";
    counter->symbol_name = "GtiL3Reads";
    counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_l3_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiMemoryReads";
    counter->symbol_name = "GtiMemoryReads";
    counter->desc = "The total number of GTI memory reads.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiMscMemoryReads";
    counter->symbol_name = "GtiMscMemoryReads";
    counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_msc_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Color Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRccMemoryReads";
    counter->symbol_name = "GtiRccMemoryReads";
    counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_rcc_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Color Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRczMemoryReads";
    counter->symbol_name = "GtiRczMemoryReads";
    counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_rcz_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRingAccesses";
    counter->symbol_name = "GtiRingAccesses";
    counter->desc = "The total number of all accesses from GTI to the ring.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_ring_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRsMemoryReads";
    counter->symbol_name = "GtiRsMemoryReads";
    counter->desc = "The total number of GTI memory reads from Resource Streamer.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_rs_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/3D Pipe/Resource Streamer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiStcMemoryReads";
    counter->symbol_name = "GtiStcMemoryReads";
    counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_stc_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiVfMemoryReads";
    counter->symbol_name = "GtiVfMemoryReads";
    counter->desc = "The total number of GTI memory reads from Vertex Fetch.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__gti_vf_memory_reads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/3D Pipe/Vertex Fetch");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_reads__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_reads__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__memory_reads__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__memory_reads__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_reads__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_reads__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__memory_reads__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__memory_reads__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__memory_reads__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__memory_reads__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_reads__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 41);
}

static void
kblgt2_add_memory_writes_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Memory Writes Distribution metrics set";
    metric_set->symbol_name = "MemoryWrites";
    metric_set->hw_config_guid = "3addf8ef-8e9b-40f5-a448-3dbb5d5128b0";
    metric_set->counters = calloc(41, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_memory_writes_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__memory_writes__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__memory_writes__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_writes__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_writes__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__memory_writes__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__memory_writes__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__memory_writes__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__memory_writes__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__memory_writes__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_writes__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiCmdStreamerMemoryWrites";
    counter->symbol_name = "GtiCmdStreamerMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Command Streamer.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_cmd_streamer_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/3D Pipe/Command Streamer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiHizMemoryWrites";
    counter->symbol_name = "GtiHizMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_hiz_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank0Writes";
    counter->symbol_name = "GtiL3Bank0Writes";
    counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_l3_bank0_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank1Writes";
    counter->symbol_name = "GtiL3Bank1Writes";
    counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_l3_bank1_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank2Writes";
    counter->symbol_name = "GtiL3Bank2Writes";
    counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_l3_bank2_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Bank3Writes";
    counter->symbol_name = "GtiL3Bank3Writes";
    counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_l3_bank3_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiL3Writes";
    counter->symbol_name = "GtiL3Writes";
    counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_l3_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiMemoryWrites";
    counter->symbol_name = "GtiMemoryWrites";
    counter->desc = "The total number of GTI memory writes.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiMscMemoryWrites";
    counter->symbol_name = "GtiMscMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_msc_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Color Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRccMemoryWrites";
    counter->symbol_name = "GtiRccMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_rcc_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Color Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRczMemoryWrites";
    counter->symbol_name = "GtiRczMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Render Depth Cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_rcz_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiRingAccesses";
    counter->symbol_name = "GtiRingAccesses";
    counter->desc = "The total number of all GTI accesses to the ring.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_ring_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiSoMemoryWrites";
    counter->symbol_name = "GtiSoMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Stream Output.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_so_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/3D Pipe/Stream Output");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GtiStcMemoryWrites";
    counter->symbol_name = "GtiStcMemoryWrites";
    counter->desc = "The total number of GTI memory writes from Stencil Cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__gti_stc_memory_writes__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GTI/Depth Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_writes__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__memory_writes__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__memory_writes__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_writes__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__memory_writes__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__memory_writes__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__memory_writes__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__memory_writes__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__memory_writes__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__memory_writes__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__memory_writes__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__memory_writes__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__memory_writes__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 41);
}

static void
kblgt2_add_compute_extended_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Compute Metrics Extended set";
    metric_set->symbol_name = "ComputeExtended";
    metric_set->hw_config_guid = "4af0400a-81c3-47db-a6b6-deddbd75680e";
    metric_set->counters = calloc(38, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_compute_extended_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__compute_extended__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__compute_extended__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_extended__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuA64UntypedReads0";
    counter->symbol_name = "EuA64UntypedReads0";
    counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_a64_untyped_reads0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuA64UntypedWrites0";
    counter->symbol_name = "EuA64UntypedWrites0";
    counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_a64_untyped_writes0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU AVG IPC Rate";
    counter->symbol_name = "EuAvgIpcRate";
    counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NUMBER;
    counter->read_float = kblgt2__compute_extended__eu_avg_ipc_rate__read;
    counter->max_float = kblgt2__compute_extended__eu_avg_ipc_rate__max;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Send Pipe Active";
    counter->symbol_name = "EuSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__eu_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Thread Occupancy";
    counter->symbol_name = "EuThreadOccupancy";
    counter->desc = "The percentage of time in which hardware threads occupied EUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__eu_thread_occupancy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuTypedAtomics0";
    counter->symbol_name = "EuTypedAtomics0";
    counter->desc = "The subslice 0 EU Typed Atomics subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_typed_atomics0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuTypedReads0";
    counter->symbol_name = "EuTypedReads0";
    counter->desc = "The subslice 0 EU Typed Reads subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_typed_reads0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuTypedWrites0";
    counter->symbol_name = "EuTypedWrites0";
    counter->desc = "The subslice 0 EU Typed Writes subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_typed_writes0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuUntypedAtomics0";
    counter->symbol_name = "EuUntypedAtomics0";
    counter->desc = "The subslice 0 EU Untyped Atomics subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_untyped_atomics0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuUntypedReads0";
    counter->symbol_name = "EuUntypedReads0";
    counter->desc = "The subslice 0 EU Untyped Reads subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_untyped_reads0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EuUntypedWrites0";
    counter->symbol_name = "EuUntypedWrites0";
    counter->desc = "The subslice 0 EU Untyped Writes subslice 0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__eu_untyped_writes0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Pipe Active";
    counter->symbol_name = "Fpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Pipe Active";
    counter->symbol_name = "Fpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extended__fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__compute_extended__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__compute_extended__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_extended__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__compute_extended__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__compute_extended__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__compute_extended__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_extended__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__compute_extended__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_extended__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__compute_extended__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Typed Atomics 0";
    counter->symbol_name = "TypedAtomics0";
    counter->desc = "The subslice 0 typed atomics.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__typed_atomics0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TypedAtomicsPerCacheLine";
    counter->symbol_name = "TypedAtomicsPerCacheLine";
    counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES;
    counter->read_float = kblgt2__compute_extended__typed_atomics_per_cache_line__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Typed Reads 0";
    counter->symbol_name = "TypedReads0";
    counter->desc = "The subslice 0 typed reads.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__typed_reads0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TypedReadsPerCacheLine";
    counter->symbol_name = "TypedReadsPerCacheLine";
    counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES;
    counter->read_float = kblgt2__compute_extended__typed_reads_per_cache_line__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Typed Writes 0";
    counter->symbol_name = "TypedWrites0";
    counter->desc = "The subslice 0 typed writes.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__typed_writes0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TypedWritesPerCacheLine";
    counter->symbol_name = "TypedWritesPerCacheLine";
    counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES;
    counter->read_float = kblgt2__compute_extended__typed_writes_per_cache_line__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Untyped Reads 0";
    counter->symbol_name = "UntypedReads0";
    counter->desc = "The subslice 0 untyped reads (including SLM reads).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__untyped_reads0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "UntypedReadsPerCacheLine";
    counter->symbol_name = "UntypedReadsPerCacheLine";
    counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES;
    counter->read_float = kblgt2__compute_extended__untyped_reads_per_cache_line__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Untyped Writes 0";
    counter->symbol_name = "UntypedWrites0";
    counter->desc = "The subslice 0 untyped writes (including SLM writes).";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_extended__untyped_writes0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "UntypedWritesPerCacheLine";
    counter->symbol_name = "UntypedWritesPerCacheLine";
    counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES;
    counter->read_float = kblgt2__compute_extended__untyped_writes_per_cache_line__read;
    counter->max_float = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    assert(metric_set->n_counters <= 38);
}

static void
kblgt2_add_compute_l3_cache_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Compute Metrics L3 Cache set";
    metric_set->symbol_name = "ComputeL3Cache";
    metric_set->hw_config_guid = "0e22f995-79ca-4f67-83ab-e9d9772488d8";
    metric_set->counters = calloc(54, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_compute_l3_cache_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__compute_l3_cache__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_l3_cache__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_l3_cache__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU AVG IPC Rate";
    counter->symbol_name = "EuAvgIpcRate";
    counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NUMBER;
    counter->read_float = kblgt2__compute_l3_cache__eu_avg_ipc_rate__read;
    counter->max_float = kblgt2__compute_l3_cache__eu_avg_ipc_rate__max;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Binary Instruction";
    counter->symbol_name = "EuBinaryFpu0Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_binary_fpu0_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Binary Instruction";
    counter->symbol_name = "EuBinaryFpu1Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_binary_fpu1_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Hybrid Instruction";
    counter->symbol_name = "EuHybridFpu0Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Hybrid Instruction";
    counter->symbol_name = "EuHybridFpu1Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Move Instruction";
    counter->symbol_name = "EuMoveFpu0Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_move_fpu0_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Move Instruction";
    counter->symbol_name = "EuMoveFpu1Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_move_fpu1_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Send Pipe Active";
    counter->symbol_name = "EuSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Ternary Instruction";
    counter->symbol_name = "EuTernaryFpu0Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Ternary Instruction";
    counter->symbol_name = "EuTernaryFpu1Instruction";
    counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes/Instructions");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Pipe Active";
    counter->symbol_name = "Fpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Pipe Active";
    counter->symbol_name = "Fpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_l3_cache__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__compute_l3_cache__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__compute_l3_cache__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_l3_cache__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI L3 Throughput";
    counter->symbol_name = "GtiL3Throughput";
    counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__gti_l3_throughput__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__gti_l3_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI/L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Read Throughput";
    counter->symbol_name = "GtiReadThroughput";
    counter->desc = "The total number of GPU memory bytes read from GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__gti_read_throughput__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__gti_read_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GTI Write Throughput";
    counter->symbol_name = "GtiWriteThroughput";
    counter->desc = "The total number of GPU memory bytes written to GTI.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__gti_write_throughput__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__gti_write_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "GTI");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_l3_cache__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Accesses";
    counter->symbol_name = "L3Accesses";
    counter->desc = "The total number of L3 accesses from all entities.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_l3_cache__l3_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3");

    if (perf->devinfo.slice_mask & 0x01) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "L3 Bank 00 Accesses";
        counter->symbol_name = "L3Bank00Accesses";
        counter->desc = "The total number of accesses to L3 Bank 00.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
        counter->read_uint64 = kblgt2__compute_l3_cache__l3_bank00_accesses__read;
        counter->max_uint64 = NULL /* undefined */;
        intel_perf_add_logical_counter(perf, counter, "L3");
    }

    if (perf->devinfo.slice_mask & 0x01) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "L3 Bank 00 IC Accesses";
        counter->symbol_name = "L3Bank00IcAccesses";
        counter->desc = "The total number of accesses to L3 Bank 00 from IC cache.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
        counter->read_uint64 = kblgt2__compute_l3_cache__l3_bank00_ic_accesses__read;
        counter->max_uint64 = NULL /* undefined */;
        intel_perf_add_logical_counter(perf, counter, "L3/IC");
    }

    if (perf->devinfo.slice_mask & 0x01) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "L3 Bank 00 IC Hits";
        counter->symbol_name = "L3Bank00IcHits";
        counter->desc = "The total number of hits in L3 Bank 00 from IC cache.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
        counter->read_uint64 = kblgt2__compute_l3_cache__l3_bank00_ic_hits__read;
        counter->max_uint64 = NULL /* undefined */;
        intel_perf_add_logical_counter(perf, counter, "L3/IC");
    }

    if (perf->devinfo.slice_mask & 0x01) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "L3 Bank 01 Accesses";
        counter->symbol_name = "L3Bank01Accesses";
        counter->desc = "The total number of accesses to L3 Bank 01.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
        counter->read_uint64 = kblgt2__compute_l3_cache__l3_bank01_accesses__read;
        counter->max_uint64 = NULL /* undefined */;
        intel_perf_add_logical_counter(perf, counter, "L3");
    }

    if (perf->devinfo.slice_mask & 0x01) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "L3 Bank 02 Accesses";
        counter->symbol_name = "L3Bank02Accesses";
        counter->desc = "The total number of accesses to L3 Bank 02.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
        counter->read_uint64 = kblgt2__compute_l3_cache__l3_bank02_accesses__read;
        counter->max_uint64 = NULL /* undefined */;
        intel_perf_add_logical_counter(perf, counter, "L3");
    }

    if (perf->devinfo.slice_mask & 0x01) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "L3 Bank 03 Accesses";
        counter->symbol_name = "L3Bank03Accesses";
        counter->desc = "The total number of accesses to L3 Bank 03.";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
        counter->read_uint64 = kblgt2__compute_l3_cache__l3_bank03_accesses__read;
        counter->max_uint64 = NULL /* undefined */;
        intel_perf_add_logical_counter(perf, counter, "L3");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Misses";
    counter->symbol_name = "L3Misses";
    counter->desc = "The total number of L3 misses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_l3_cache__l3_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/TAG");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Sampler Throughput";
    counter->symbol_name = "L3SamplerThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__l3_sampler_throughput__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__l3_sampler_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Sampler");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Total Throughput";
    counter->symbol_name = "L3TotalThroughput";
    counter->desc = "The total number of GPU memory bytes transferred via L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__l3_total_throughput__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__l3_total_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_l3_cache__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Accesses";
    counter->symbol_name = "SamplerAccesses";
    counter->desc = "The total number of messages send to samplers.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_l3_cache__sampler_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__compute_l3_cache__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_l3_cache__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_l3_cache__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__compute_l3_cache__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__compute_l3_cache__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__compute_l3_cache__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__compute_l3_cache__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 54);
}

static void
kblgt2_add_hdc_and_sf_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set HDCAndSF";
    metric_set->symbol_name = "HDCAndSF";
    metric_set->hw_config_guid = "fb149d66-fad2-4230-b0d7-4d689b9116d3";
    metric_set->counters = calloc(40, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_hdc_and_sf_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__hdc_and_sf__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__hdc_and_sf__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__hdc_and_sf__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__hdc_and_sf__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__hdc_and_sf__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__hdc_and_sf__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__hdc_and_sf__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__hdc_and_sf__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__hdc_and_sf__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__hdc_and_sf__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__hdc_and_sf__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "HDC stalled by L3 (s0.ss0)";
        counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
        counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Data Port");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "HDC stalled by L3 (s0.ss1)";
        counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
        counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Data Port");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "HDC stalled by L3 (s0.ss2)";
        counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
        counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Data Port");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Polygon Data Ready";
    counter->symbol_name = "PolyDataReady";
    counter->desc = "The percentage of time in which geometry pipeline output is ready";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__poly_data_ready__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU/3D Pipe/Strip-Fans");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__hdc_and_sf__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__hdc_and_sf__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__hdc_and_sf__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__hdc_and_sf__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__hdc_and_sf__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__hdc_and_sf__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__hdc_and_sf__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__hdc_and_sf__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__hdc_and_sf__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__hdc_and_sf__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__hdc_and_sf__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 40);
}

static void
kblgt2_add_l3_1_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set L3_1";
    metric_set->symbol_name = "L3_1";
    metric_set->hw_config_guid = "f889f8cc-4c93-4ac8-b75f-551c0b9b87f7";
    metric_set->counters = calloc(40, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_l3_1_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__l3_1__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__l3_1__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_1__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_1__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_1__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__l3_1__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__l3_1__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_1__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_1__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank0 Active";
        counter->symbol_name = "L30Bank0Active";
        counter->desc = "The percentage of time in which slice0 L3 bank0 is active";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_1__l30_bank0_active__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank0 Stalled";
        counter->symbol_name = "L30Bank0Stalled";
        counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_1__l30_bank0_stalled__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank1 Active";
        counter->symbol_name = "L30Bank1Active";
        counter->desc = "The percentage of time in which slice0 L3 bank1 is active";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_1__l30_bank1_active__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank1 Stalled";
        counter->symbol_name = "L30Bank1Stalled";
        counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_1__l30_bank1_stalled__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_1__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__l3_1__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_1__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__l3_1__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__l3_1__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_1__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_1__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_1__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_1__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_1__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__l3_1__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_1__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__l3_1__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_1__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_1__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 40);
}

static void
kblgt2_add_l3_2_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set L3_2";
    metric_set->symbol_name = "L3_2";
    metric_set->hw_config_guid = "6d344efd-8e9e-42d4-a29e-1011c29f82c2";
    metric_set->counters = calloc(38, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_l3_2_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__l3_2__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__l3_2__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_2__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_2__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_2__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__l3_2__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__l3_2__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_2__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_2__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank2 Active";
        counter->symbol_name = "L30Bank2Active";
        counter->desc = "The percentage of time in which slice0 L3 bank2 is active";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_2__l30_bank2_active__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank2 Stalled";
        counter->symbol_name = "L30Bank2Stalled";
        counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_2__l30_bank2_stalled__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_2__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__l3_2__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_2__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__l3_2__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__l3_2__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_2__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_2__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_2__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_2__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_2__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__l3_2__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_2__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__l3_2__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_2__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_2__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 38);
}

static void
kblgt2_add_l3_3_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set L3_3";
    metric_set->symbol_name = "L3_3";
    metric_set->hw_config_guid = "e3b386ae-c195-47d5-af29-8a1afa0ae2bf";
    metric_set->counters = calloc(38, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_l3_3_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__l3_3__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__l3_3__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_3__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_3__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_3__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__l3_3__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__l3_3__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_3__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_3__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank3 Active";
        counter->symbol_name = "L30Bank3Active";
        counter->desc = "The percentage of time in which slice0 L3 bank3 is active";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_3__l30_bank3_active__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 L3 Bank3 Stalled";
        counter->symbol_name = "L30Bank3Stalled";
        counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__l3_3__l30_bank3_stalled__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI/L3");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_3__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__l3_3__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_3__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__l3_3__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__l3_3__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__l3_3__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_3__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_3__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__l3_3__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_3__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__l3_3__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__l3_3__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__l3_3__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__l3_3__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__l3_3__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 38);
}

static void
kblgt2_add_rasterizer_and_pixel_backend_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set RasterizerAndPixelBackend";
    metric_set->symbol_name = "RasterizerAndPixelBackend";
    metric_set->hw_config_guid = "f9954679-a055-4862-9f57-9d66e3ebf81c";
    metric_set->counters = calloc(41, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_rasterizer_and_pixel_backend_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__rasterizer_and_pixel_backend__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 PS Output Available";
        counter->symbol_name = "PSOutput0Available";
        counter->desc = "The percentage of time in which slice0 PS output is available";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__rasterizer_and_pixel_backend__ps_output0_available__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/3D Pipe");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
        counter->symbol_name = "PixelData0Ready";
        counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Rasterizer/Early Depth Test");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Pixel Values Ready";
        counter->symbol_name = "PixelValues0Ready";
        counter->desc = "The percentage of time in which slice0 pixel values are ready";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/3D Pipe");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Rasterizer Input Available";
        counter->symbol_name = "Rasterizer0InputAvailable";
        counter->desc = "The percentage of time in which slice0 rasterizer input is available";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Rasterizer");
    }

    if (perf->devinfo.slice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Rasterizer Output Ready";
        counter->symbol_name = "Rasterizer0OutputReady";
        counter->desc = "The percentage of time in which slice0 rasterizer output is ready";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Rasterizer");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__rasterizer_and_pixel_backend__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__rasterizer_and_pixel_backend__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__rasterizer_and_pixel_backend__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__rasterizer_and_pixel_backend__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 41);
}

static void
kblgt2_add_sampler_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set Sampler";
    metric_set->symbol_name = "Sampler";
    metric_set->hw_config_guid = "4ed5e27e-fd1a-4f11-ad8f-9374e128c697";
    metric_set->counters = calloc(42, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_sampler_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__sampler__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__sampler__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__sampler__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__sampler__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__sampler__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__sampler__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__sampler__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__sampler__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__sampler__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__sampler__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__sampler__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Subslice0 Input Available";
        counter->symbol_name = "Sampler00InputAvailable";
        counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__sampler00_input_available__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Subslice0 Sampler Output Ready";
        counter->symbol_name = "Sampler00OutputReady";
        counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__sampler00_output_ready__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Subslice1 Input Available";
        counter->symbol_name = "Sampler01InputAvailable";
        counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__sampler01_input_available__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Subslice1 Sampler Output Ready";
        counter->symbol_name = "Sampler01OutputReady";
        counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__sampler01_output_ready__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Subslice2 Input Available";
        counter->symbol_name = "Sampler02InputAvailable";
        counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__sampler02_input_available__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Sampler");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Slice0 Subslice2 Sampler Output Ready";
        counter->symbol_name = "Sampler02OutputReady";
        counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__sampler__sampler02_output_ready__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Sampler");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__sampler__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__sampler__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__sampler__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__sampler__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__sampler__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__sampler__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__sampler__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__sampler__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__sampler__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__sampler__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__sampler__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__sampler__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 42);
}

static void
kblgt2_add_tdl_1_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set TDL_1";
    metric_set->symbol_name = "TDL_1";
    metric_set->hw_config_guid = "9eaf384d-8f53-41b8-a71d-108308780fbc";
    metric_set->counters = calloc(42, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_tdl_1_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__tdl_1__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__tdl_1__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_1__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_1__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__tdl_1__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__tdl_1__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_1__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_1__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__tdl_1__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__tdl_1__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
        counter->symbol_name = "NonPSThread00ReadyForDispatch";
        counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
        counter->symbol_name = "NonPSThread01ReadyForDispatch";
        counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
        counter->symbol_name = "NonPSThread02ReadyForDispatch";
        counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
        counter->symbol_name = "PSThread00ReadyForDispatch";
        counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__ps_thread00_ready_for_dispatch__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
        counter->symbol_name = "PSThread01ReadyForDispatch";
        counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__ps_thread01_ready_for_dispatch__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
        counter->symbol_name = "PSThread02ReadyForDispatch";
        counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_1__ps_thread02_ready_for_dispatch__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_1__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__tdl_1__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__tdl_1__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_1__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__tdl_1__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__tdl_1__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__tdl_1__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__tdl_1__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__tdl_1__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__tdl_1__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__tdl_1__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_1__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_1__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 42);
}

static void
kblgt2_add_tdl_2_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set TDL_2";
    metric_set->symbol_name = "TDL_2";
    metric_set->hw_config_guid = "a00cbdf2-eabd-4240-9a89-86e2ac1ca1e4";
    metric_set->counters = calloc(42, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_tdl_2_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__tdl_2__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__tdl_2__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_2__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_2__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Depth Test Fails";
    counter->symbol_name = "EarlyDepthTestFails";
    counter->desc = "The total number of pixels dropped on early depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__early_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Early Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    if (true &&
        perf->devinfo.query_mode) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "SQ is full";
        counter->symbol_name = "GTRequestQueueFull";
        counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries)";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__gt_request_queue_full__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GTI");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__tdl_2__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__tdl_2__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_2__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Early Hi-Depth Test Fails";
    counter->symbol_name = "HiDepthTestFails";
    counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__hi_depth_test_fails__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer/Hi-Depth Test");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_2__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "L3 Shader Throughput";
    counter->symbol_name = "L3ShaderThroughput";
    counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__tdl_2__l3_shader_throughput__read;
    counter->max_uint64 = kblgt2__tdl_2__l3_shader_throughput__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Pixels Failing Tests";
    counter->symbol_name = "PixelsFailingPostPsTests";
    counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__pixels_failing_post_ps_tests__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Both FPU Active";
    counter->symbol_name = "PsEuBothFpuActive";
    counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__ps_eu_both_fpu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS Send Pipeline Active";
    counter->symbol_name = "PsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__ps_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_2__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Rasterized Pixels";
    counter->symbol_name = "RasterizedPixels";
    counter->desc = "The total number of rasterized pixels.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__rasterized_pixels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Rasterizer");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels Misses";
    counter->symbol_name = "SamplerTexelMisses";
    counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__tdl_2__sampler_texel_misses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Cache");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Sampler Texels";
    counter->symbol_name = "SamplerTexels";
    counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS;
    counter->read_uint64 = kblgt2__tdl_2__sampler_texels__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "Sampler/Sampler Input");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Blended";
    counter->symbol_name = "SamplesBlended";
    counter->desc = "The total number of blended samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__samples_blended__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Killed in FS";
    counter->symbol_name = "SamplesKilledInPs";
    counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__samples_killed_in_ps__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Samples Written";
    counter->symbol_name = "SamplesWritten";
    counter->desc = "The total number of samples or pixels written to all render targets.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS;
    counter->read_uint64 = kblgt2__tdl_2__samples_written__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "3D Pipe/Output Merger");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Atomic Memory Accesses";
    counter->symbol_name = "ShaderAtomics";
    counter->desc = "The total number of shader atomic memory accesses.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__tdl_2__shader_atomics__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/Atomics");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Barrier Messages";
    counter->symbol_name = "ShaderBarriers";
    counter->desc = "The total number of shader barrier messages.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__tdl_2__shader_barriers__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Barrier");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Shader Memory Accesses";
    counter->symbol_name = "ShaderMemoryAccesses";
    counter->desc = "The total number of shader memory accesses to L3.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES;
    counter->read_uint64 = kblgt2__tdl_2__shader_memory_accesses__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Read";
    counter->symbol_name = "SlmBytesRead";
    counter->desc = "The total number of GPU memory bytes read from shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__tdl_2__slm_bytes_read__read;
    counter->max_uint64 = kblgt2__tdl_2__slm_bytes_read__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "SLM Bytes Written";
    counter->symbol_name = "SlmBytesWritten";
    counter->desc = "The total number of GPU memory bytes written into shared local memory.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES;
    counter->read_uint64 = kblgt2__tdl_2__slm_bytes_written__read;
    counter->max_uint64 = kblgt2__tdl_2__slm_bytes_written__max;
    intel_perf_add_logical_counter(perf, counter, "L3/Data Port/SLM");

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
        counter->symbol_name = "ThreadHeader00ReadyPort0";
        counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__thread_header00_ready_port0__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
        counter->symbol_name = "ThreadHeader00ReadyPort1";
        counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__thread_header00_ready_port1__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
        counter->symbol_name = "ThreadHeader01ReadyPort0";
        counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__thread_header01_ready_port0__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x2) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
        counter->symbol_name = "ThreadHeader01ReadyPort1";
        counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__thread_header01_ready_port1__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
        counter->symbol_name = "ThreadHeader02ReadyPort0";
        counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__thread_header02_ready_port0__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    if (perf->devinfo.subslice_mask & 0x4) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
        counter->symbol_name = "ThreadHeader02ReadyPort1";
        counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__tdl_2__thread_header02_ready_port1__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Thread Dispatcher");
    }

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Send Pipe Active";
    counter->symbol_name = "VsSendActive";
    counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__tdl_2__vs_send_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__tdl_2__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 42);
}

static void
kblgt2_add_compute_extra_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Compute Metrics Extra set";
    metric_set->symbol_name = "ComputeExtra";
    metric_set->hw_config_guid = "aa7a3fb9-22fb-43ff-a32d-0ab6c13bbd16";
    metric_set->counters = calloc(5, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_compute_extra_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__compute_extra__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__compute_extra__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Pipe Active";
    counter->symbol_name = "Fpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extra__fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Pipe Active including Ext Math";
    counter->symbol_name = "Fpu1ActiveAdjusted";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__compute_extra__fpu1_active_adjusted__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__compute_extra__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__compute_extra__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    assert(metric_set->n_counters <= 5);
}

static void
kblgt2_add_vme_pipe_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Media Vme Pipe metrics set";
    metric_set->symbol_name = "VMEPipe";
    metric_set->hw_config_guid = "398a4268-ef6f-4ffc-b55f-3c7b5363ce61";
    metric_set->counters = calloc(10, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_vme_pipe_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__vme_pipe__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__vme_pipe__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__vme_pipe__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__vme_pipe__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Both FPU Pipes Active";
    counter->symbol_name = "EuFpuBothActive";
    counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__vme_pipe__eu_fpu_both_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__vme_pipe__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Thread Occupancy";
    counter->symbol_name = "EuThreadOccupancy";
    counter->desc = "The percentage of time in which hardware threads occupied EUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__vme_pipe__eu_thread_occupancy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__vme_pipe__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__vme_pipe__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__vme_pipe__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VME Busy";
    counter->symbol_name = "VMEBusy";
    counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__vme_pipe__vme_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "VME Pipe");

    assert(metric_set->n_counters <= 10);
}

static void
kblgt2_add_gpu_busyness_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Gpu Rings Busyness";
    metric_set->symbol_name = "GpuBusyness";
    metric_set->hw_config_guid = "6c66fe6e-2988-454a-bfae-7fca3bbcbec2";
    metric_set->counters = calloc(8, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_gpu_busyness_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AnyRingBusy";
    counter->symbol_name = "AnyRingBusy";
    counter->desc = "The percentage of time when any command streamer was busy.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__gpu_busyness__any_ring_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__gpu_busyness__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__gpu_busyness__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Blitter Ring Busy";
    counter->symbol_name = "BlitterBusy";
    counter->desc = "The percentage of time when blitter command streamer was busy.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__gpu_busyness__blitter_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__gpu_busyness__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__gpu_busyness__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Render Ring Busy";
    counter->symbol_name = "RenderBusy";
    counter->desc = "The percentage of time when render command streamer was busy.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__gpu_busyness__render_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Vdbox0 Ring Busy";
    counter->symbol_name = "Vdbox0Busy";
    counter->desc = "The percentage of time when Vdbox0 command streamer was busy.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__gpu_busyness__vdbox0_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "Vebox Ring Busy";
    counter->symbol_name = "VeboxBusy";
    counter->desc = "The percentage of time when vebox command streamer was busy.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__gpu_busyness__vebox_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    assert(metric_set->n_counters <= 8);
}

static void
kblgt2_add_test_oa_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "MDAPI testing set";
    metric_set->symbol_name = "TestOa";
    metric_set->hw_config_guid = "cd4b577b-073a-423f-a948-e198dd818c71";
    metric_set->counters = calloc(12, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_test_oa_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__test_oa__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__test_oa__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter0";
    counter->symbol_name = "Counter0";
    counter->desc = "HW test counter 0. Factor: 0.0";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter0__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter1";
    counter->symbol_name = "Counter1";
    counter->desc = "HW test counter 1. Factor: 1.0";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter1__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter2";
    counter->symbol_name = "Counter2";
    counter->desc = "HW test counter 2. Factor: 1.0";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter2__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter3";
    counter->symbol_name = "Counter3";
    counter->desc = "HW test counter 3. Factor: 0.5";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter3__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter4";
    counter->symbol_name = "Counter4";
    counter->desc = "HW test counter 4. Factor: 0.333";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter4__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter5";
    counter->symbol_name = "Counter5";
    counter->desc = "HW test counter 5. Factor: 0.333";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter5__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter6";
    counter->symbol_name = "Counter6";
    counter->desc = "HW test counter 6. Factor: 0.166";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter6__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter7";
    counter->symbol_name = "Counter7";
    counter->desc = "HW test counter 7. Factor: 0.666";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter7__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "TestCounter8";
    counter->symbol_name = "Counter8";
    counter->desc = "HW test counter 8. Should be equal to 1.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS;
    counter->read_uint64 = kblgt2__test_oa__counter8__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__test_oa__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__test_oa__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    assert(metric_set->n_counters <= 12);
}

static void
kblgt2_add_pma__stall_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "Metric set PMA Stall";
    metric_set->symbol_name = "PMA_Stall";
    metric_set->hw_config_guid = "b49aa434-4958-4d98-9e6f-443ff27ca74d";
    metric_set->counters = calloc(4, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_pma__stall_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__pma__stall__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__pma__stall__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__pma__stall__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__pma__stall__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    if (perf->devinfo.slice_mask & 1) {
        counter = &metric_set->counters[metric_set->n_counters++];
        counter->metric_set = metric_set;
        counter->name = "STC PMA stall";
        counter->symbol_name = "StcPMAStall";
        counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls";
        counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
        counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
        counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
        counter->read_float = kblgt2__pma__stall__stc_pma_stall__read;
        counter->max_float = percentage_max_callback_float;
        intel_perf_add_logical_counter(perf, counter, "GPU/Stencil Cache");
    }

    assert(metric_set->n_counters <= 4);
}

static void
kblgt2_add_async_compute_metric_set(struct intel_perf *perf)
{
    struct intel_perf_metric_set *metric_set;
    struct intel_perf_logical_counter *counter;

    metric_set = calloc(1, sizeof(*metric_set));
    metric_set->name = "AsyncCompute";
    metric_set->symbol_name = "AsyncCompute";
    metric_set->hw_config_guid = "4032137b-8dcc-4f3f-912d-c0bb14a123bc";
    metric_set->counters = calloc(21, sizeof(struct intel_perf_logical_counter));
    metric_set->n_counters = 0;
    metric_set->perf_oa_metrics_set = 0; // determined at runtime
    metric_set->perf_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;

    metric_set->perf_raw_size = 256;
    metric_set->gpu_time_offset = 0;
    metric_set->gpu_clock_offset = 1;
    metric_set->a_offset = 2;
    metric_set->b_offset = metric_set->a_offset + 36;
    metric_set->c_offset = metric_set->b_offset + 8;
    metric_set->perfcnt_offset = metric_set->c_offset + 8;

    kblgt2_async_compute_add_registers(perf, metric_set);
    intel_perf_add_metric_set(perf, metric_set);


    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "AVG GPU Core Frequency";
    counter->symbol_name = "AvgGpuCoreFrequency";
    counter->desc = "Average GPU Core Frequency in the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ;
    counter->read_uint64 = kblgt2__async_compute__avg_gpu_core_frequency__read;
    counter->max_uint64 = kblgt2__async_compute__avg_gpu_core_frequency__max;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS FPU0 Pipe Active";
    counter->symbol_name = "CsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__cs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS FPU1 Pipe Active";
    counter->symbol_name = "CsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__cs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "CS Threads Dispatched";
    counter->symbol_name = "CsThreads";
    counter->desc = "The total number of compute shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__async_compute__cs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Compute Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "DS Threads Dispatched";
    counter->symbol_name = "DsThreads";
    counter->desc = "The total number of domain shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__async_compute__ds_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Domain Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Active";
    counter->symbol_name = "EuActive";
    counter->desc = "The percentage of time in which the Execution Units were actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__eu_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Stall";
    counter->symbol_name = "EuStall";
    counter->desc = "The percentage of time in which the Execution Units were stalled.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__eu_stall__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU Thread Occupancy";
    counter->symbol_name = "EuThreadOccupancy";
    counter->desc = "The percentage of time in which hardware threads occupied EUs.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__eu_thread_occupancy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU0 Pipe Active";
    counter->symbol_name = "Fpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "EU FPU1 Pipe Active";
    counter->symbol_name = "Fpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pipes");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Busy";
    counter->symbol_name = "GpuBusy";
    counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__gpu_busy__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Core Clocks";
    counter->symbol_name = "GpuCoreClocks";
    counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES;
    counter->read_uint64 = kblgt2__async_compute__gpu_core_clocks__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GPU Time Elapsed";
    counter->symbol_name = "GpuTime";
    counter->desc = "Time elapsed on the GPU during the measurement.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_NS;
    counter->read_uint64 = kblgt2__async_compute__gpu_time__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "GPU");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "GS Threads Dispatched";
    counter->symbol_name = "GsThreads";
    counter->desc = "The total number of geometry shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__async_compute__gs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Geometry Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "HS Threads Dispatched";
    counter->symbol_name = "HsThreads";
    counter->desc = "The total number of hull shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__async_compute__hs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Hull Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU0 Pipe Active";
    counter->symbol_name = "PsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__ps_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "PS FPU1 Pipe Active";
    counter->symbol_name = "PsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__ps_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Pixel Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "FS Threads Dispatched";
    counter->symbol_name = "PsThreads";
    counter->desc = "The total number of fragment shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__async_compute__ps_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Fragment Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU0 Pipe Active";
    counter->symbol_name = "VsFpu0Active";
    counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__vs_fpu0_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS FPU1 Pipe Active";
    counter->symbol_name = "VsFpu1Active";
    counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT;
    counter->read_float = kblgt2__async_compute__vs_fpu1_active__read;
    counter->max_float = percentage_max_callback_float;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    counter = &metric_set->counters[metric_set->n_counters++];
    counter->metric_set = metric_set;
    counter->name = "VS Threads Dispatched";
    counter->symbol_name = "VsThreads";
    counter->desc = "The total number of vertex shader hardware threads dispatched.";
    counter->type = INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT;
    counter->storage = INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64;
    counter->unit = INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS;
    counter->read_uint64 = kblgt2__async_compute__vs_threads__read;
    counter->max_uint64 = NULL /* undefined */;
    intel_perf_add_logical_counter(perf, counter, "EU Array/Vertex Shader");

    assert(metric_set->n_counters <= 21);
}

void
intel_perf_load_metrics_kblgt2(struct intel_perf *perf)
{
    kblgt2_add_render_basic_metric_set(perf);
    kblgt2_add_compute_basic_metric_set(perf);
    kblgt2_add_render_pipe_profile_metric_set(perf);
    kblgt2_add_memory_reads_metric_set(perf);
    kblgt2_add_memory_writes_metric_set(perf);
    kblgt2_add_compute_extended_metric_set(perf);
    kblgt2_add_compute_l3_cache_metric_set(perf);
    kblgt2_add_hdc_and_sf_metric_set(perf);
    kblgt2_add_l3_1_metric_set(perf);
    kblgt2_add_l3_2_metric_set(perf);
    kblgt2_add_l3_3_metric_set(perf);
    kblgt2_add_rasterizer_and_pixel_backend_metric_set(perf);
    kblgt2_add_sampler_metric_set(perf);
    kblgt2_add_tdl_1_metric_set(perf);
    kblgt2_add_tdl_2_metric_set(perf);
    kblgt2_add_compute_extra_metric_set(perf);
    kblgt2_add_vme_pipe_metric_set(perf);
    kblgt2_add_gpu_busyness_metric_set(perf);
    kblgt2_add_test_oa_metric_set(perf);
    kblgt2_add_pma__stall_metric_set(perf);
    kblgt2_add_async_compute_metric_set(perf);
}
