Sindbad~EG File Manager
/* -*- linux-c -*-
* time-estimation with minimal dependency on xtime
* Copyright (C) 2006 Intel Corporation.
* Copyright (C) 2010-2019 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
* Public License (GPL); either version 2, or (at your option) any
* later version.
*/
#if defined(STAPCONF_KTIME_GET_REAL_FAST_NS)
/* Prefer this simple 4.15+ mechanism for timekeeping. */
static int
_stp_init_time(void) {
return 0;
}
static void
_stp_kill_time(void) {
}
static int64_t _stp_gettimeofday_ns(void) {
return ktime_get_real_fast_ns();
}
#else /* rest of file - do it the hard way */
#if defined (__i386__) || defined (__x86_64__)
#include <asm/cpufeature.h>
#endif
#if defined (STAPCONF_TSC_KHZ) && \
!(defined (__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21))
// x86_64 didn't need a specific header until 2.6.21. Otherwise:
#include <asm/tsc.h>
#endif
#ifdef STAPCONF_KTIME_GET_REAL
#include <linux/ktime.h>
#endif
#include <linux/cpufreq.h>
/* The interval at which the __stp_time_timer_callback routine runs,
which resynchronizes our per-cpu base_ns/base_cycles values. A
lower rate (higher interval) is sufficient if we get cpufreq-change
notifications. */
#ifndef STP_TIME_SYNC_INTERVAL_NOCPUFREQ
#define STP_TIME_SYNC_INTERVAL_NOCPUFREQ ((HZ+9)/10) /* ten times per second */
#endif
#ifndef STP_TIME_SYNC_INTERVAL_CPUFREQ
#define STP_TIME_SYNC_INTERVAL_CPUFREQ (HZ*10) /* once per ten seconds */
#endif
static int __stp_cpufreq_notifier_registered = 0;
#ifndef STP_TIME_SYNC_INTERVAL
#define STP_TIME_SYNC_INTERVAL (__stp_cpufreq_notifier_registered ? \
STP_TIME_SYNC_INTERVAL_CPUFREQ : \
STP_TIME_SYNC_INTERVAL_NOCPUFREQ)
#endif
#ifndef NSEC_PER_MSEC
#define NSEC_PER_MSEC 1000000L
#endif
typedef struct __stp_time_t {
/*
* A write lock is taken by __stp_time_timer_callback() and
* __stp_time_cpufreq_callback(). Neither writer is in interrupt context,
* and both disable interrupts before taking the lock, so there should be
* no opportunity for deadlock.
*
* A read lock is taken by _stp_gettimeofday_us(). There is the potential
* for this to occur at any time, so there is a slim chance that this will
* happen while the write lock is held, and it will be impossible to get a
* read lock. However, we can limit how long we try to get the lock to
* avoid a deadlock.
*
* Note that seqlock is chosen so that readers don't block writers. It's
* also important that readers can attempt a lock from _any_ context (e.g.,
* NMI), and some kernels don't have read_trylock.
*/
seqlock_t lock;
/* These provide a reference time to correlate cycles to real time */
int64_t base_ns;
cycles_t base_cycles;
/* The frequency in kHz of this CPU's time stamp counter, for interpolating
* cycle counts from the base time. */
unsigned int freq;
/* Callback used to schedule updates of the base time */
struct timer_list timer;
/* Flag to indicate that a timer is indeed added. We only add timers
* to online CPUs at startup but online CPUs may change at any time
* through the CPU hotplug feature of the kernel. */
bool is_valid;
} stp_time_t;
static void *stp_time = NULL;
/* Try to estimate the number of CPU cycles in a millisecond - i.e. kHz. This
* relies heavily on the accuracy of udelay. By calling udelay twice, we
* attempt to account for overhead in the call.
*
* NB: interrupts should be disabled when calling this.
*
* FIXME: This not very accurate on Xen kernels!
*/
static unsigned int
__stp_get_freq(void)
{
// If we can get the actual frequency of HW counter, we use it.
#if defined (__ia64__)
return local_cpu_data->itc_freq / 1000;
#elif defined (__s390__) || defined (__s390x__) || defined (__arm__)
// We don't need to find the cpu freq on s390 as the
// TOD clock is always a fix freq. (see: POO pg 4-36.)
return 0;
#elif (defined (__i386__) || defined (__x86_64__)) && defined(STAPCONF_TSC_KHZ)
return tsc_khz;
#elif (defined (__i386__) || defined (__x86_64__)) && defined(STAPCONF_CPU_KHZ)
return cpu_khz;
#else /* __i386__ || __x86_64__ || __aarch64__ */
// If we don't know the actual frequency, we estimate it.
cycles_t beg, mid, end;
beg = get_cycles(); barrier();
udelay(1); barrier();
mid = get_cycles(); barrier();
udelay(1001); barrier();
end = get_cycles(); barrier();
return (beg - 2*mid + end);
#endif
}
static void
__stp_ktime_get_real_ts(struct timespec *ts)
{
#ifdef STAPCONF_KTIME_GET_REAL
ktime_get_real_ts(ts);
#else /* STAPCONF_KTIME_GET_REAL */
struct timeval tv;
do_gettimeofday(&tv);
ts->tv_sec = tv.tv_sec;
ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
#endif
}
/* Update this cpu's base_ns/base_cycles values. May be called from
initialization or various other callback mechanisms. */
static stp_time_t*
__stp_time_local_update(void)
{
unsigned long flags;
stp_time_t *time;
struct timespec ts;
int64_t ns;
cycles_t cycles;
// PR20600: We used to disable interrupts here with
// local_irq_save(), then call write_seqlock_irqsave() further
// down when we write to 'time'. However, this caused intermittent
// hangs. Only disabling interrupts once here seems to fix the
// problem, at the cost of holding time->lock a bit longer than we
// originally did.
time = per_cpu_ptr(stp_time, smp_processor_id());
if (unlikely(!time->is_valid)) {
return time;
}
write_seqlock_irqsave(&time->lock, flags);
__stp_ktime_get_real_ts(&ts);
cycles = get_cycles();
ns = (NSEC_PER_SEC * (int64_t)ts.tv_sec) + ts.tv_nsec;
time->base_ns = ns;
time->base_cycles = cycles;
write_sequnlock_irqrestore(&time->lock, flags);
return time;
}
/* The cross-smp call. */
static void
__stp_time_smp_callback(void *val)
{
(void) val;
(void) __stp_time_local_update();
}
/* The timer callback is in a softIRQ -- interrupts enabled. */
static void
__stp_time_timer_callback(stp_timer_callback_parameter_t unused)
{
stp_time_t *time =__stp_time_local_update();
if (unlikely(!time->is_valid))
return;
(void) unused;
/* PR6481: make sure IRQs are enabled before resetting the timer
(IRQs are disabled and then reenabled in
__stp_time_local_update()).
XXX: The worst that can probably happen is that we get
two consecutive timer resets. */
if (likely(atomic_read(session_state()) != STAP_SESSION_STOPPED))
mod_timer(&time->timer, jiffies + STP_TIME_SYNC_INTERVAL);
#ifdef DEBUG_TIME
_stp_warn("cpu%d %p khz=%d base=%lld cycles=%lld\n", smp_processor_id(), (void*)time, time->freq,
(long long)time->base_ns, (long long)time->base_cycles);
#endif
}
/* This is called as an IPI, with interrupts disabled. */
static void
__stp_init_time(void *info)
{
struct timespec ts;
// XXX: The following code replaces the call to __stp_time_local_update():
#if 1
int64_t ns;
cycles_t cycles;
#endif
stp_time_t *time = per_cpu_ptr(stp_time, smp_processor_id());
#if 1
// Note -- this is the same calculation as in __stp_time_local_update:
__stp_ktime_get_real_ts(&ts);
cycles = get_cycles();
ns = (NSEC_PER_SEC * (int64_t)ts.tv_sec) + ts.tv_nsec;
time->base_ns = ns;
time->base_cycles = cycles;
#endif
seqlock_init(&time->lock);
time->freq = __stp_get_freq();
// XXX: The following is definitely not legal on kernel-rt,
// since seqlock cannot be used when interrupts are disabled.
//
// In addition, this is also not recommended to do in a function
// being called via on_each_cpu() / smp_call_function()
// -- see comment in Linux kernel/smp.c
//__stp_time_local_update();
timer_setup(&time->timer, __stp_time_timer_callback, 0);
time->timer.expires = jiffies + STP_TIME_SYNC_INTERVAL;
#ifndef STAPCONF_ADD_TIMER_ON
add_timer(&time->timer);
time->is_valid = true;
#endif
}
#ifdef CONFIG_CPU_FREQ
/* The cpufreq callback is not in interrupt context -- interrupts enabled */
static int
__stp_time_cpufreq_callback(struct notifier_block *self,
unsigned long state, void *vfreqs)
{
unsigned long flags;
struct cpufreq_freqs *freqs;
int freq_khz;
stp_time_t *time;
struct timespec ts;
int64_t ns;
cycles_t cycles;
int reset_timer_p = 0;
switch (state) {
case CPUFREQ_POSTCHANGE:
#ifdef CPUFREQ_RESUMECHANGE
case CPUFREQ_RESUMECHANGE:
#endif
freqs = (struct cpufreq_freqs *)vfreqs;
freq_khz = freqs->new;
time = per_cpu_ptr(stp_time, freqs->cpu);
if (unlikely(!time->is_valid))
break;
write_seqlock_irqsave(&time->lock, flags);
if (time->freq != freq_khz) {
time->freq = freq_khz;
// NB: freqs->cpu may not equal smp_processor_id(),
// so we can't update the subject processor's
// base_ns/base_cycles values just now.
reset_timer_p = 1;
}
write_sequnlock_irqrestore(&time->lock, flags);
if (reset_timer_p) {
#ifdef DEBUG_TIME
_stp_warn ("cpu%d %p freq->%d\n", freqs->cpu, (void*)time, freqs->new);
#endif
#if defined(STAPCONF_SMPCALL_5ARGS) || defined(STAPCONF_SMPCALL_4ARGS)
(void) smp_call_function_single (freqs->cpu, &__stp_time_smp_callback, 0,
#ifdef STAPCONF_SMPCALL_5ARGS
1, /* nonatomic */
#endif
0); /* not wait */
#else
/* RHEL4ish: cannot direct to a single cpu ... so broadcast to them all */
(void) smp_call_function (&__stp_time_smp_callback, NULL, 0, 0);
#endif
}
break;
}
return NOTIFY_OK;
}
static struct notifier_block __stp_time_notifier = {
.notifier_call = __stp_time_cpufreq_callback,
};
static int
__stp_constant_freq(void)
{
#ifdef STAPCONF_CONSTANT_TSC
// If the CPU has constant tsc, we don't need to use cpufreq.
return boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
#elif defined (__ia64__) || defined (__s390__) || defined (__s390x__) || defined (__arm__)
// these architectures have constant time counter.
return 1;
#else
return 0;
#endif
}
#endif /* CONFIG_CPU_FREQ */
/* This function is called during module unloading. */
static void
_stp_kill_time(void)
{
if (stp_time) {
int cpu;
/* NB We cannot use the for_each_online_cpu() here since online
* CPUs may get changed on-the-fly through the CPU hotplug feature
* of the kernel. We only allocated the context structs on original
* online CPUs when _stp_runtime_contexts_alloc() was called.
*/
for_each_possible_cpu(cpu) {
stp_time_t *time = per_cpu_ptr(stp_time, cpu);
if (likely(time->is_valid)) {
del_timer_sync(&time->timer);
}
}
#ifdef CONFIG_CPU_FREQ
if (!__stp_constant_freq() && __stp_cpufreq_notifier_registered) {
cpufreq_unregister_notifier(&__stp_time_notifier,
CPUFREQ_TRANSITION_NOTIFIER);
}
#endif
_stp_free_percpu(stp_time);
stp_time = NULL;
}
}
/* This function is called during module loading. */
static int
_stp_init_time(void)
{
int cpu, ret = 0;
might_sleep();
_stp_kill_time();
stp_time = _stp_alloc_percpu(sizeof(stp_time_t));
if (unlikely(stp_time == NULL))
return -ENOMEM;
#ifdef STAPCONF_ONEACHCPU_RETRY
ret = on_each_cpu(__stp_init_time, NULL, 0, 1);
#else
ret = on_each_cpu(__stp_init_time, NULL, 1);
#endif
#ifdef STAPCONF_ADD_TIMER_ON
for_each_online_cpu(cpu) {
stp_time_t *time = per_cpu_ptr(stp_time, cpu);
add_timer_on(&time->timer, cpu);
time->is_valid = true;
}
#endif
#ifdef CONFIG_CPU_FREQ
if (!ret && !__stp_constant_freq()) {
if (!cpufreq_register_notifier(&__stp_time_notifier,
CPUFREQ_TRANSITION_NOTIFIER)) {
__stp_cpufreq_notifier_registered = 1;
for_each_online_cpu(cpu) {
unsigned long flags;
int freq_khz = cpufreq_get(cpu); // may block
if (freq_khz > 0) {
stp_time_t *time = per_cpu_ptr(stp_time, cpu);
if (likely(time->is_valid)) {
write_seqlock_irqsave(&time->lock, flags);
time->freq = freq_khz;
write_sequnlock_irqrestore(&time->lock, flags);
}
}
}
}
}
#endif
if (ret)
_stp_kill_time();
return ret;
}
#ifndef STP_TIMELOCKDELAY
#define STP_TIMELOCKDELAY 100 /* ns */
#endif
#ifndef STP_TIMELOCKTRIES
#define STP_TIMELOCKTRIES 10 /* total 1 us */
#endif
static int64_t
_stp_gettimeofday_ns(void)
{
int64_t base;
cycles_t last;
uint64_t delta;
unsigned int freq;
unsigned int seq;
stp_time_t *time;
int i = 0;
if (!stp_time)
return -1;
time = per_cpu_ptr(stp_time, smp_processor_id());
if (unlikely(!time->is_valid)) {
return -1;
}
preempt_disable(); /* XXX: why? Isn't this is only run from probe handlers? */
seq = read_seqbegin(&time->lock);
base = time->base_ns;
last = time->base_cycles;
freq = time->freq;
while (unlikely(read_seqretry(&time->lock, seq))) {
if (/* very */ unlikely(++i >= STP_TIMELOCKTRIES)) {
preempt_enable_no_resched();
_stp_warn ("_stp_gettimofday_ns seqlock timeout; see STP_TIMELOCK*");
return 0;
}
ndelay(STP_TIMELOCKDELAY);
seq = read_seqbegin(&time->lock);
base = time->base_ns;
last = time->base_cycles;
freq = time->freq;
}
delta = get_cycles() - last;
preempt_enable_no_resched();
#if defined (__s390__) || defined (__s390x__)
// The TOD clock on the s390 (read by get_cycles() )
// is converted to a nano-second value using the following:
// (get_cycles() * 125) >> 7;
delta = (delta * 125) >> 7;
#elif defined (__arm__)
/* arm always returns 0 for get_cycles() */
/* so this is just a fake value until we get a real fix. */
delta = 1000;
#else /* __s390__ || __s390x__ */
// Verify units:
// (D cycles) * (1E6 ns/ms) / (F cycles/ms [kHz]) = ns
delta *= NSEC_PER_MSEC;
if (freq == 0)
return 0;
do_div(delta, freq);
#endif
return base + delta;
}
#endif /* ! defined(STAPCONF_KTIME_GET_REAL_FAST_NS) */
Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists