/*
 * Copyright (C) 2005 Intel Corporation
 *
 * This software and the related documents are Intel copyrighted materials, and your use of them
 * is governed by the express license under which they were provided to you ("License"). Unless
 * the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose
 * or transmit this software or the related documents without Intel's prior written permission.
 *
 * This software and the related documents are provided as is, with no express or implied
 * warranties, other than those that are expressly stated in the License.
*/

#include "lwpmudrv_defines.h"

#include "lwpmudrv_types.h"
#include "rise_errors.h"
#include "lwpmudrv_ecb.h"
#include "lwpmudrv_struct.h"
#if defined(DRV_IA32) || defined(DRV_EM64T)
#include "apic.h"
#endif
#include "lwpmudrv.h"
#include "output.h"
#include "control.h"
#include "pmi.h"
#include "utility.h"
#if defined(DRV_IA32) || defined(DRV_EM64T)
#include "pebs.h"
#endif

#include <sys/cpuset.h>
#include <sys/pmckern.h>

#include <machine/cpu.h>

#ifdef DRV_USE_NMI
#include <sys/bus.h>

#include <machine/intr_machdep.h>
#if __FreeBSD_version >= 1100006
#include <x86/apicvar.h>
#else
#include <machine/apicvar.h>
#endif
#include <machine/frame.h>
#include <machine/segments.h>
#endif
#if defined(DRV_IA32) || defined(DRV_EM64T)
#include "sepdrv_p_state.h"
#endif

// Desc id #0 is used for module records
#define COMPUTE_DESC_ID(index)     ((index))

extern DRV_CONFIG     drv_cfg;
extern uid_t          uid;

#define EFLAGS_V86_MASK       0x00020000L

/*********************************************************************
 * Global Variables / State
 *********************************************************************/

/*********************************************************************
 * Interrupt Handler
 *********************************************************************/

/*
 *  PMI_Interrupt_Handler
 *      Arguments
 *          IntFrame - Pointer to the Interrupt Frame
 *
 *      Returns
 *          None
 *
 *      Description
 *  Grab the data that is needed to populate the sample records
 */
#if defined(DRV_IA32)

asmlinkage VOID
PMI_Interrupt_Handler (
     struct pt_regs *regs
)
{
    SampleRecordPC  *psamp;
    CPU_STATE        pcpu;
    BUFFER_DESC      bd;
    U32              csdlo;        // low  half code seg descriptor
    U32              csdhi;        // high half code seg descriptor
    U32              seg_cs;       // code seg selector
    DRV_MASKS_NODE   event_mask;
    U32              this_cpu;
    U32              dev_idx;
    DISPATCH         dispatch;
    DEV_CONFIG       pcfg;
    U32              i;
    U32              pid;
    U32              tid;
    U64              tsc;
    U32              desc_id;
    EVENT_DESC       evt_desc;
#if defined(SECURE_SEP)
    uid_t            l_uid;
#endif
    U32              accept_interrupt = 1;
    U64              lbr_tos_from_ip = 0;
    U32              sample_size = 0;
    U32              core_sample_size = 0;
    U32              unc_dev_idx;
    DEV_UNC_CONFIG   pcfg_unc         = NULL;
    DISPATCH         dispatch_unc     = NULL;
    U32              read_unc_evt_counts_from_intr = 0;

    SEP_DRV_LOG_INTERRUPT_IN("PID: %d, TID: %d.", current->pid, GET_CURRENT_TGID()); // needs to be before function calls for the tracing to make sense
                                                                                      // may later want to separate the INTERRUPT_IN from the PID/TID logging

    this_cpu = CONTROL_THIS_CPU();
    pcpu     = &pcb[this_cpu];
    bd       = &cpu_buf[this_cpu];
    dev_idx  = core_to_dev_map[this_cpu];
    dispatch = LWPMU_DEVICE_dispatch(&devices[dev_idx]);
    pcfg     = LWPMU_DEVICE_pcfg(&devices[dev_idx]);
    SYS_Locked_Inc(&CPU_STATE_in_interrupt(pcpu));

    // Disable the counter control
    dispatch->freeze(NULL);

#if defined(SECURE_SEP)
    l_uid            = DRV_GET_UID(current);
    accept_interrupt = (l_uid == uid);
#endif
    dispatch->check_overflow(&event_mask);
    if (GET_DRIVER_STATE() == DRV_STATE_RUNNING &&
        CPU_STATE_accept_interrupt(&pcb[this_cpu])) {

        pid = GET_CURRENT_TGID();
        tid = current->pid;

        if (DRV_CONFIG_target_pid(drv_cfg) > 0 && pid != DRV_CONFIG_target_pid(drv_cfg)) {
            accept_interrupt = 0;
        }

        if (accept_interrupt) {
            UTILITY_Read_TSC(&tsc);

            for (i = 0; i < event_mask.masks_num; i++) {
                if (DRV_EVENT_MASK_collect_on_ctx_sw(&event_mask.eventmasks[i])) {
                    if (CPU_STATE_last_thread_id(pcpu) == tid) {
                        continue;
                    }
                    else {
                        CPU_STATE_last_thread_id(pcpu) = tid;
                    }
                }
                if (DRV_CONFIG_mixed_ebc_available(drv_cfg)) {
                    desc_id = DRV_EVENT_MASK_desc_id(&event_mask.eventmasks[i]);
                }
                else {
                    if (DRV_CONFIG_event_based_counts(drv_cfg) == 0) {
                        desc_id  = COMPUTE_DESC_ID(DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]));
                    }
                    else {
                        desc_id = CPU_STATE_current_group(pcpu);
                    }
                }

                evt_desc = desc_data[desc_id];
                core_sample_size = EVENT_DESC_sample_size(evt_desc);
                sample_size = core_sample_size;
                psamp = (SampleRecordPC *)OUTPUT_Reserve_Buffer_Space(bd, sample_size, !SEP_IN_NOTIFICATION);

                if (!psamp) {
                    continue;
                }
                lbr_tos_from_ip                        = 0;
                CPU_STATE_num_samples(pcpu)           += 1;
                SAMPLE_RECORD_descriptor_id(psamp)     = desc_id;
                SAMPLE_RECORD_tsc(psamp)               = tsc;
                SAMPLE_RECORD_pid_rec_index_raw(psamp) = 1;
                SAMPLE_RECORD_pid_rec_index(psamp)     = pid;
                SAMPLE_RECORD_tid(psamp)               = tid;
                SAMPLE_RECORD_eip(psamp)               = REGS_eip(regs);
                SAMPLE_RECORD_eflags(psamp)            = REGS_eflags(regs);
                SAMPLE_RECORD_cpu_num(psamp)           = (U16) this_cpu;
                SAMPLE_RECORD_cs(psamp)                = (U16) REGS_xcs(regs);
                SAMPLE_RECORD_osid(psamp)              = 0;

                if (SAMPLE_RECORD_eflags(psamp) & EFLAGS_V86_MASK) {
                    csdlo = 0;
                    csdhi = 0;
                }
                else {
                    seg_cs = SAMPLE_RECORD_cs(psamp);
                    SYS_Get_CSD(seg_cs, &csdlo, &csdhi);
                }
                SAMPLE_RECORD_csd(psamp).u1.lowWord  = csdlo;
                SAMPLE_RECORD_csd(psamp).u2.highWord = csdhi;

                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_pid_rec_index(psamp)  %x.", SAMPLE_RECORD_pid_rec_index(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_tid(psamp) %x.", SAMPLE_RECORD_tid(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_eip(psamp) %x.", SAMPLE_RECORD_eip(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_eflags(psamp) %x.", SAMPLE_RECORD_eflags(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_cpu_num(psamp) %x.", SAMPLE_RECORD_cpu_num(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_cs(psamp) %x.", SAMPLE_RECORD_cs(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_csd(psamp).lowWord %x.", SAMPLE_RECORD_csd(psamp).u1.lowWord);
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_csd(psamp).highWord %x.", SAMPLE_RECORD_csd(psamp).u2.highWord);

                SAMPLE_RECORD_event_index(psamp) = DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]);
                if (DEV_CONFIG_pebs_mode(pcfg) && DRV_EVENT_MASK_precise(&event_mask.eventmasks[i]) == 1) {
                    if (EVENT_DESC_pebs_offset(evt_desc) ||
                        EVENT_DESC_latency_offset_in_sample(evt_desc)) {
                        lbr_tos_from_ip = PEBS_Fill_Buffer((S8 *)psamp, evt_desc);
                    }
                    PEBS_Modify_IP((S8 *)psamp, FALSE);
                    PEBS_Modify_TSC((S8 *)psamp):
                }
                if (DEV_CONFIG_collect_lbrs(pcfg) &&
                    EVENT_DESC_lbr_offset(evt_desc) &&
                    DRV_EVENT_MASK_lbr_capture(&event_mask.eventmasks[i]) &&
                    !DEV_CONFIG_adaptive_pebs_collect_lbrs(pcfg)) {
                    lbr_tos_from_ip = dispatch->read_lbrs(!DEV_CONFIG_store_lbrs(pcfg) ? NULL:((S8 *)(psamp)+EVENT_DESC_lbr_offset(evt_desc)));
                }
                if (DRV_EVENT_MASK_branch(&event_mask.eventmasks[i]) && DEV_CONFIG_precise_ip_lbrs(pcfg) && lbr_tos_from_ip) {
                    SAMPLE_RECORD_eip(psamp)       = lbr_tos_from_ip;
                    SEP_DRV_LOG_TRACE("UPDATED SAMPLE_RECORD_eip(psamp) %x.", SAMPLE_RECORD_eip(psamp));
                }
                if (DEV_CONFIG_power_capture(pcfg)) {
                    dispatch->read_power(((S8 *)(psamp)+EVENT_DESC_power_offset_in_sample(evt_desc)));
                }
                if (DRV_CONFIG_event_based_counts(drv_cfg) &&
                    DRV_EVENT_MASK_trigger(&event_mask.eventmasks[i])) {
                    dispatch->read_counts((S8 *)psamp, DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]));
                }
                if (DEV_CONFIG_enable_perf_metrics(pcfg) && DRV_EVENT_MASK_perf_metrics_capture(&event_mask.eventmasks[i])) {
                    dispatch->read_metrics((S8 *)(psamp)+EVENT_DESC_perfmetrics_offset(evt_desc));
                }

#if (defined(DRV_IA32) || defined(DRV_EM64T))
                if (DRV_CONFIG_enable_p_state(drv_cfg)) {
                    if (DRV_CONFIG_read_pstate_msrs(drv_cfg)) {
                        SEPDRV_P_STATE_Read((S8 *)(psamp)+EVENT_DESC_p_state_offset(evt_desc), pcpu);
                    }
                    if (!DRV_CONFIG_event_based_counts(drv_cfg) && CPU_STATE_p_state_counting(pcpu)) {
                        dispatch->read_counts((S8 *)psamp, DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]));
                    }
                }
#endif
                if (DRV_CONFIG_unc_collect_in_intr_enabled(drv_cfg) && DRV_EVENT_MASK_trigger(&event_mask.eventmasks[i])) {
                    for (unc_dev_idx = num_core_devs; unc_dev_idx < num_devices; unc_dev_idx++) {
                        pcfg_unc = (DEV_UNC_CONFIG)LWPMU_DEVICE_pcfg(&devices[unc_dev_idx]);
                        dispatch_unc = LWPMU_DEVICE_dispatch(&devices[unc_dev_idx]);

                        if (pcfg_unc && DEV_UNC_CONFIG_device_with_intr_events(pcfg_unc) &&
                            dispatch_unc && dispatch_unc->trigger_read) {
                            read_unc_evt_counts_from_intr = 1;
                            dispatch_unc->trigger_read((S8 *)(psamp)+EVENT_DESC_uncore_ebc_offset(evt_desc), unc_dev_idx, read_unc_evt_counts_from_intr);
                        }
                    }
                }
            } // for
        }
    }
    if (DEV_CONFIG_pebs_mode(pcfg)) {
        PEBS_Reset_Index(this_cpu);
    }

    APIC_Ack_Eoi();

    // Reset the data counters
    if (CPU_STATE_trigger_count(&pcb[this_cpu]) == 0) {
        dispatch->swap_group(FALSE);
    }
    // Re-enable the counter control
    dispatch->restart(NULL);
    SYS_Locked_Dec(&CPU_STATE_in_interrupt(&pcb[this_cpu]));

    SEP_DRV_LOG_INTERRUPT_OUT("");
    return;
}

#endif  // DRV_IA32


#if defined(DRV_EM64T)

#define IS_LDT_BIT       0x4
#define SEGMENT_SHIFT    3
IDTGDT_DESC              gdt_desc;

static U32
pmi_Get_CSD (
    U32     seg,
    U32    *low,
    U32    *high
)
{
    PVOID               gdt_max_addr;
    struct user_segment_descriptor *gdt;
    CodeDescriptor     *csd;

    //
    // These could be pre-init'ed values
    //
    gdt_max_addr = (PVOID) (((U64) gdt_desc.idtgdt_base) + gdt_desc.idtgdt_limit);
    gdt          = gdt_desc.idtgdt_base;

    //
    // end pre-init potential...
    //

    //
    // We don't do ldt's
    //
    if (seg & IS_LDT_BIT) {
        *low  = 0;
        *high = 0;
        return (FALSE);
    }

    //
    // segment offset is based on dropping the bottom 3 bits...
    //
    csd = (CodeDescriptor *) &(gdt[seg >> SEGMENT_SHIFT]);

    if (((PVOID) csd) >= gdt_max_addr) {
        SEP_DRV_LOG_WARNING("segment too big in get_CSD(0x%x).", seg);
        return FALSE;
    }

    *low  = csd->u1.lowWord;
    *high = csd->u2.highWord;

    SEP_DRV_LOG_TRACE("get_CSD - seg 0x%x, low %08x, high %08x, reserved_0: %d.",
                     seg, *low, *high, csd->u2.s2.reserved_0);

    return TRUE;
}

__attribute__((regparm(0))) void
PMI_Interrupt_Handler (struct trapframe *regs)
{
#if __FreeBSD_version >= 1200067
	_PMI_Interrupt_Handler(regs);
#else
	_PMI_Interrupt_Handler(curcpu, regs);
#endif
}

#if __FreeBSD_version >= 1200067
int
_PMI_Interrupt_Handler (struct trapframe *regs)
{
    int this_cpu = curcpu;
#else
int
_PMI_Interrupt_Handler (int this_cpu, struct trapframe *regs)
{
#endif
    SampleRecordPC  *psamp;
    CPU_STATE        pcpu;
    BUFFER_DESC      bd;
    DRV_MASKS_NODE   event_mask;
    U32              i;
    U32              dev_idx;
    DISPATCH         dispatch;
    DEV_CONFIG       pcfg;
    U32              is_64bit_addr;
    U32              pid;
    U32              tid;
    U64              tsc;
    U32              desc_id;
    EVENT_DESC       evt_desc;
    U32              accept_interrupt = 1;
#if defined(SECURE_SEP)
    uid_t            l_uid;
#endif
    boolean_t        signal_full, reap_samples = FALSE;
    U64              lbr_tos_from_ip = 0;
    U32              sample_size = 0;
    U32              core_sample_size = 0;
    U32              unc_dev_idx;
    DEV_UNC_CONFIG   pcfg_unc         = NULL;
    DISPATCH         dispatch_unc     = NULL;
    U32              read_unc_evt_counts_from_intr = 0;

    SEP_DRV_LOG_INTERRUPT_IN("PID: %d, TID: %d.", curproc->p_pid, curthread->td_tid); // needs to be before function calls for the tracing to make sense
                                                                                      // may later want to separate the INTERRUPT_IN from the PID/TID logging

    pcpu     = &pcb[this_cpu];
    bd       = &cpu_buf[this_cpu];
    dev_idx  = core_to_dev_map[this_cpu];
    dispatch = LWPMU_DEVICE_dispatch(&devices[dev_idx]);
    pcfg     = LWPMU_DEVICE_pcfg(&devices[dev_idx]);
    // Disable the counter control
    dispatch->freeze(NULL);
    SYS_Locked_Inc(&CPU_STATE_in_interrupt(pcpu));

#if defined(SECURE_SEP)
    l_uid            = DRV_GET_UID(current);
    accept_interrupt = (l_uid == uid);
#endif
    dispatch->check_overflow(&event_mask);
    if (GET_DRIVER_STATE() == DRV_STATE_RUNNING &&
        CPU_STATE_accept_interrupt(&pcb[this_cpu])) {

        pid = curproc->p_pid;
        tid = curthread->td_tid;

        if (DRV_CONFIG_target_pid(drv_cfg) > 0 && pid != DRV_CONFIG_target_pid(drv_cfg)) {
            accept_interrupt = 0;
        }
        if (accept_interrupt) {
            UTILITY_Read_TSC(&tsc);

            for (i = 0; i < event_mask.masks_num; i++) {

                if (DRV_CONFIG_mixed_ebc_available(drv_cfg)) {
                    desc_id = DRV_EVENT_MASK_desc_id(&event_mask.eventmasks[i]);
                }
                else {
                    if (DRV_CONFIG_event_based_counts(drv_cfg) == 0) {
                        desc_id  = COMPUTE_DESC_ID(DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]));
                    }
                    else {
                        desc_id = CPU_STATE_current_group(pcpu);
                    }
                }
                evt_desc = desc_data[desc_id];

                core_sample_size = EVENT_DESC_sample_size(evt_desc);
                sample_size = core_sample_size;
                psamp = (SampleRecordPC *)OUTPUT_Reserve_Buffer_Space(bd,
                                                                      sample_size,
                                                                      &signal_full,
                                                                      !SEP_IN_NOTIFICATION);
                if (signal_full && !CPU_STATE_deferred_reap_samples(pcpu)) {
                    /*
                     * Since we may currently be in NMI context (depending
                     *  on whether DRV_USE_NMI is set) we cannot issue a
                     *  a cv_signal here because it will try to acquire a
                     *  spinlock which isn't allowed during NMI.  Instead
                     *  set this reap_samples flag, which at the end of this
                     *  function will indicate to the kernel that we need
                     *  a callback during non-NMI context to signal the
                     *  sample collector thread.
                     */
                    reap_samples = TRUE;
                }

                if (!psamp) {
                    continue;
                }
                lbr_tos_from_ip                        = 0;
                CPU_STATE_num_samples(pcpu)           += 1;
                SAMPLE_RECORD_descriptor_id(psamp)     = desc_id;
                SAMPLE_RECORD_tsc(psamp)               = tsc;
                SAMPLE_RECORD_pid_rec_index_raw(psamp) = 1;
                SAMPLE_RECORD_pid_rec_index(psamp)     = pid;
                SAMPLE_RECORD_tid(psamp)               = tid;
                SAMPLE_RECORD_cpu_num(psamp)           = (U16) this_cpu;
                SAMPLE_RECORD_cs(psamp)                = (U16) REGS_cs(regs);
                SAMPLE_RECORD_osid(psamp)              = 0;

                pmi_Get_CSD(SAMPLE_RECORD_cs(psamp),
                        &SAMPLE_RECORD_csd(psamp).u1.lowWord,
                        &SAMPLE_RECORD_csd(psamp).u2.highWord);

                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_pid_rec_index(psamp)  %x.", SAMPLE_RECORD_pid_rec_index(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_tid(psamp) %x.", SAMPLE_RECORD_tid(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_cpu_num(psamp) %x.", SAMPLE_RECORD_cpu_num(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_cs(psamp) %x.", SAMPLE_RECORD_cs(psamp));
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_csd(psamp).lowWord %x.", SAMPLE_RECORD_csd(psamp).u1.lowWord);
                SEP_DRV_LOG_TRACE("SAMPLE_RECORD_csd(psamp).highWord %x.", SAMPLE_RECORD_csd(psamp).u2.highWord);

                is_64bit_addr = (SAMPLE_RECORD_csd(psamp).u2.s2.reserved_0 == 1);
                if (is_64bit_addr) {
                    SAMPLE_RECORD_iip(psamp)           = REGS_rip(regs);
                    SAMPLE_RECORD_ipsr(psamp)          = (REGS_eflags(regs) & 0xffffffff) |
                        (((U64) SAMPLE_RECORD_csd(psamp).u2.s2.dpl) << 32);
                    SAMPLE_RECORD_ia64_pc(psamp)       = TRUE;
                }
                else {
                    SAMPLE_RECORD_eip(psamp)           = REGS_rip(regs);
                    SAMPLE_RECORD_eflags(psamp)        = REGS_eflags(regs);
                    SAMPLE_RECORD_ia64_pc(psamp)       = FALSE;

                    SEP_DRV_LOG_TRACE("SAMPLE_RECORD_eip(psamp) %x.", SAMPLE_RECORD_eip(psamp));
                    SEP_DRV_LOG_TRACE("SAMPLE_RECORD_eflags(psamp) %x.", SAMPLE_RECORD_eflags(psamp));
                }

                SAMPLE_RECORD_event_index(psamp) = DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]);
                if (DEV_CONFIG_pebs_mode(pcfg)) {
                    if (DRV_EVENT_MASK_precise(&event_mask.eventmasks[i])) {
                        if (EVENT_DESC_pebs_offset(evt_desc) ||
                            EVENT_DESC_latency_offset_in_sample(evt_desc)) {
                            lbr_tos_from_ip = PEBS_Fill_Buffer((S8 *)psamp, evt_desc);
                        }
                        PEBS_Modify_IP((S8 *)psamp, is_64bit_addr);
                        PEBS_Modify_TSC((S8 *)psamp);
                    }
                }
                if (DEV_CONFIG_collect_lbrs(pcfg) &&
                    EVENT_DESC_lbr_offset(evt_desc) &&
                    DRV_EVENT_MASK_lbr_capture(&event_mask.eventmasks[i]) &&
                    !DEV_CONFIG_adaptive_pebs_collect_lbrs(pcfg)) {
                    lbr_tos_from_ip = dispatch->read_lbrs(!DEV_CONFIG_store_lbrs(pcfg) ? NULL:((S8 *)(psamp)+EVENT_DESC_lbr_offset(evt_desc)));
                }
                if (DRV_EVENT_MASK_branch(&event_mask.eventmasks[i]) && DEV_CONFIG_precise_ip_lbrs(pcfg) && lbr_tos_from_ip) {
                    if (is_64bit_addr) {
                        SAMPLE_RECORD_iip(psamp)       = lbr_tos_from_ip;
                        SEP_DRV_LOG_TRACE("UPDATED SAMPLE_RECORD_iip(psamp) 0x%llx.", SAMPLE_RECORD_iip(psamp));
                    }
                    else {
                        SAMPLE_RECORD_eip(psamp)       = lbr_tos_from_ip;
                        SEP_DRV_LOG_TRACE("UPDATED SAMPLE_RECORD_eip(psamp) %x.", SAMPLE_RECORD_eip(psamp));
                    }
                }
                if (DEV_CONFIG_collect_os_callstacks(pcfg)) {
                    uintptr_t *cc = (uintptr_t *)((U8 *)(psamp)+EVENT_DESC_callstack_offset(evt_desc));
                    U32 ccsize = EVENT_DESC_callstack_size(evt_desc);
                    U32 i;

                    /* First, clear the callchain address array. */
                    for (i = 0; i < ccsize; ++i) {
                        cc[i] = 0;
                    }

                    /*
                     * If we're sampling a usermode address, we need to
                     *  notify the notify the raw NMI vector code to call us
                     *  back after exiting NMI context to capture the user
                     *  callchain.  Also make sure we defer setting the
                     *  pmc_cpumask by checking the do_samples flag.  We want
                     *  to wait until after the user callchain has been saved
                     *  before the samples are reaped by the sep user thread.
                     */
                    if (TRAPF_USERMODE(regs)) {
                        if ((curthread->td_pflags & TDP_CALLCHAIN) == 0) {
                            curthread->td_pflags |= TDP_CALLCHAIN;
                            sched_pin();
                        }

                        for (i = 0; i < MAX_CC_PER_INTERRUPT; ++i) {
                            if (CPU_STATE_user_callchain_size(pcpu, i) == 0) {
                                CPU_STATE_user_callchain_size(pcpu, i) = ccsize;
                                CPU_STATE_user_callchain_buffer(pcpu, i) = cc;
                                break;
                            }
                        }

                        if (reap_samples) {
                            CPU_STATE_deferred_reap_samples(pcpu) = 1;
                            reap_samples = FALSE;
                        }
                    } else {
                        /*
                         * We are sampling a kernel address, so we can capture
                         *  kernel callchain immediately.
                         */
                        sep_save_kernel_callchain(cc, ccsize, regs);
                    }
                }
                if (DEV_CONFIG_power_capture(pcfg)) {
                    dispatch->read_power(((S8 *)(psamp)+EVENT_DESC_power_offset_in_sample(evt_desc)));
                }
                if (DRV_CONFIG_event_based_counts(drv_cfg) &&
                    DRV_EVENT_MASK_trigger(&event_mask.eventmasks[i])) {
                    dispatch->read_counts((S8 *)psamp, DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]));
                }
                if (DEV_CONFIG_enable_perf_metrics(pcfg) && DRV_EVENT_MASK_perf_metrics_capture(&event_mask.eventmasks[i])) {
                    dispatch->read_metrics((S8 *)(psamp)+EVENT_DESC_perfmetrics_offset(evt_desc));
                }
#if (defined(DRV_IA32) || defined(DRV_EM64T))
                if (DRV_CONFIG_enable_p_state(drv_cfg)) {
                    SEPDRV_P_STATE_Read((S8 *)(psamp)+EVENT_DESC_p_state_offset(evt_desc), pcpu);
                    if (!DRV_CONFIG_event_based_counts(drv_cfg) && CPU_STATE_p_state_counting(pcpu)) {
                        dispatch->read_counts((S8 *)psamp, DRV_EVENT_MASK_event_idx(&event_mask.eventmasks[i]));
                    }
                }
#endif
                if (DRV_CONFIG_unc_collect_in_intr_enabled(drv_cfg) && DRV_EVENT_MASK_trigger(&event_mask.eventmasks[i])) {
                    for (unc_dev_idx = num_core_devs; unc_dev_idx < num_devices; unc_dev_idx++) {
                        pcfg_unc = (DEV_UNC_CONFIG)LWPMU_DEVICE_pcfg(&devices[unc_dev_idx]);
                        dispatch_unc = LWPMU_DEVICE_dispatch(&devices[unc_dev_idx]);

                        if (pcfg_unc && DEV_UNC_CONFIG_device_with_intr_events(pcfg_unc) &&
                            dispatch_unc && dispatch_unc->trigger_read) {
                            read_unc_evt_counts_from_intr = 1;
                            dispatch_unc->trigger_read((S8 *)(psamp)+EVENT_DESC_uncore_ebc_offset(evt_desc), unc_dev_idx, read_unc_evt_counts_from_intr);
                        }
                    }
                }
            }
        }
    }
    if (DEV_CONFIG_pebs_mode(pcfg)) {
        PEBS_Reset_Index(this_cpu);
    }

#ifdef DRV_USE_NMI
    lapic_reenable_pmc();
#else
    APIC_Ack_Eoi();
#endif

    if (reap_samples) {
#if __FreeBSD_version >= 1200067
		DPCPU_SET(pmc_sampled, 1);
#elif __FreeBSD_version >= 900040
		CPU_SET_ATOMIC(this_cpu, &pmc_cpumask);
#else
		atomic_set_rel_int(&pmc_cpumask, (1 << this_cpu));
#endif
    }

    // Reset the data counters
    if (CPU_STATE_trigger_count(&pcb[this_cpu]) == 0) {
        dispatch->swap_group(FALSE);
    }
    // Re-enable the counter control
    dispatch->restart(NULL);
    SYS_Locked_Dec(&CPU_STATE_in_interrupt(&pcb[this_cpu]));

    // Always return 1, indicating we handled the interrupt.
    // Checking event_mask.masks_num works mostly, but there are corner cases
    //  where precise and non-precise events overflow simultaneously,
    //  resulting in two separate NMIs.  The second NMI will result in no
    //  events handled.
    SEP_DRV_LOG_INTERRUPT_OUT("");
    return (1);
}

#endif
