Sindbad~EG File Manager

Current Path : /proc/2715199/root/usr/share/systemtap/runtime/transport/
Current File : //proc/2715199/root/usr/share/systemtap/runtime/transport/control.c
/* -*- linux-c -*-
 *
 * control channel
 * Copyright (C) 2007-2018 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#include "control.h"
#include "../mempool.c"
#include "symbols.c"
#include <linux/delay.h>
#include <linux/poll.h>
#include "../uidgid_compatibility.h"

static _stp_mempool_t *_stp_pool_q;
static struct list_head _stp_ctl_ready_q;
static STP_DEFINE_SPINLOCK(_stp_ctl_ready_lock);
static STP_DEFINE_SPINLOCK(_stp_ctl_special_msg_lock);

static void _stp_cleanup_and_exit(int send_exit);
static void _stp_handle_tzinfo (struct _stp_msg_tzinfo* tzi);
static void _stp_handle_privilege_credentials (struct _stp_msg_privilege_credentials* pc);
static void _stp_handle_remote_id (struct _stp_msg_remote_id* rem);
static void _stp_handle_namespaces_pid (struct _stp_msg_ns_pid *nspid);


static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
        static DEFINE_MUTEX(cmd_mutex);
	u32 type;
        int rc = 0;

#ifdef STAPCONF_TASK_UID
	uid_t euid = current->euid;
#else
#if defined(CONFIG_USER_NS) || (LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0))
	uid_t euid = from_kuid_munged(current_user_ns(), current_euid());
#else
	uid_t euid = current_euid();
#endif
#endif

	_stp_pid = current->pid;

	if (count < sizeof(u32))
		return 0;

	if (get_user(type, (u32 __user *)buf))
		return -EFAULT;

	count -= sizeof(u32);
	buf += sizeof(u32);

#if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2)
	if (type < STP_MAX_CMD)
		dbug_trans2("Got %s. euid=%ld, len=%d\n", _stp_command_name[min(type,STP_MAX_CMD)] ?: "?",
			    (long)euid, (int)count);
#endif

        // PR17232: preclude reentrancy during handling of messages.
        // This also permits use of static variables in the switch/case.
	might_sleep();
        mutex_lock (& cmd_mutex);
        // NB: past this point, no 'return;' - use 'goto out;'

	switch (type) {
	case STP_START:
        {
                static struct _stp_msg_start st;
                if (count < sizeof(st)) {
                        rc = 0; // ?
                        goto out;
                }
                if (copy_from_user(&st, buf, sizeof(st))) {
                        rc = -EFAULT;
                        goto out;
                }
                _stp_handle_start(&st);
        }
        break;

	case STP_EXIT:
                _stp_cleanup_and_exit(1);
		break;

	case STP_BULK:
                // NB: this signals the runtime to save separate
                // per-cpu files; our kernel->userspace files are now
                // always bulkmode (trace$N files).
#ifdef STP_BULKMODE
                // no action needed
                break;
#else
		rc = -EINVAL;
                goto out;
#endif

	case STP_RELOCATION:
		if (euid != 0) {
                        rc = -EPERM;
                        goto out;
                }
                /* This message is too large to copy here.
                   Further error checking is within the
                   function, but XXX no rc is passed back. */
          	_stp_do_relocation (buf, count);
          	break;

        case STP_TZINFO:
        {
                /* NB PR13445: We use a static struct here to contain
                   the kernel-side copy of the user-space message.
                   This should be suspicious (due to concurrency
                   concerns), but actually it's OK.  The main reason
                   is that _stp_ctl_open_cmd() enforces only a single
                   open() at a time on the .ctl file, and staprun
                   (euid=0) isn't multithreaded, and doesn't pass this
                   filehandle anywhere. */
                static struct _stp_msg_tzinfo tzi;
		if (euid != 0) {
                        rc = -EPERM;
                        goto out;
                }
                if (count < sizeof(tzi)) {
                        rc = 0;
                        goto out;
                }
                if (copy_from_user(&tzi, buf, sizeof(tzi))) {
                        rc = -EFAULT;
                        goto out;
                }
                _stp_handle_tzinfo(&tzi);
        }
        break;

        case STP_PRIVILEGE_CREDENTIALS:
        {
                /* NB PR13445: as above. */
                static struct _stp_msg_privilege_credentials pc;
		if (euid != 0) {
                        rc = -EPERM;
                        goto out;
                }
                if (count < sizeof(pc)) {
                        rc = 0;
                        goto out;
                }
                if (copy_from_user(&pc, buf, sizeof(pc))) {
                        rc = -EFAULT;
                        goto out;
                }
                _stp_handle_privilege_credentials(&pc);
        }
        break;

        case STP_REMOTE_ID:
        {
                /* NB PR13445: as above. */
                static struct _stp_msg_remote_id rem;
		if (euid != 0) {
                        rc = -EPERM;
                        goto out;
                }
                if (count < sizeof(rem)) {
                        rc = 0;
                        goto out;
                }
                if (copy_from_user(&rem, buf, sizeof(rem))) {
                        rc = -EFAULT;
                        goto out;
                }
                _stp_handle_remote_id(&rem);
        }
        break;

	case STP_READY:
		break;
  case STP_NAMESPACES_PID:
    {
    static struct _stp_msg_ns_pid nspid;
                if (count < sizeof(nspid)) {
                        rc = 0;
                        goto out;
                }
                if (copy_from_user(&nspid, buf, sizeof(nspid))) {
                        rc = -EFAULT;
                        goto out;
                }

    _stp_handle_namespaces_pid(&nspid);
    }
    break;

	default:
#ifdef DEBUG_TRANS
		dbug_trans2("invalid command type %d\n", type);
#endif
		rc = -EINVAL;
                goto out;
	}

        // fall through
	rc = count + sizeof(u32); /* Pretend that we absorbed the entire message. */

out:
        mutex_unlock (& cmd_mutex);

#if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2)
	if (type < STP_MAX_CMD)
		dbug_trans2("Completed %s (rc=%d)\n",
                            _stp_command_name[min(type,STP_MAX_CMD)] ?: "?",
                            rc);
#endif
        return rc;
}

static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq);

#ifdef DEBUG_TRANS
static void _stp_ctl_write_dbug(int type, void *data, int len)
{
	char buf[64];
	switch (type) {
	case STP_START:
		dbug_trans2("sending STP_START\n");
		break;
	case STP_EXIT:
		dbug_trans2("sending STP_EXIT\n");
		break;
	case STP_OOB_DATA:
		snprintf(buf, sizeof(buf), "%s", (char *)data);
		dbug_trans2("sending %d bytes of STP_OOB_DATA: %s\n", len,
			    buf);
		break;
	case STP_SYSTEM:
		snprintf(buf, sizeof(buf), "%s", (char *)data);
		dbug_trans2("sending STP_SYSTEM: %s\n", buf);
		break;
	case STP_TRANSPORT:
		dbug_trans2("sending STP_TRANSPORT\n");
		break;
	case STP_CONNECT:
		dbug_trans2("sending STP_CONNECT\n");
		break;
	case STP_DISCONNECT:
		dbug_trans2("sending STP_DISCONNECT\n");
		break;
	case STP_BULK:
		dbug_trans2("sending STP_BULK\n");
		break;
	case STP_READY:
	case STP_RELOCATION:
	case STP_BUF_INFO:
	case STP_SUBBUFS_CONSUMED:
		dbug_trans2("sending old message\n");
		break;
	case STP_REALTIME_DATA:
		dbug_trans2("sending %d bytes of STP_REALTIME_DATA\n", len);
		break;
	case STP_REQUEST_EXIT:
		dbug_trans2("sending STP_REQUEST_EXIT\n");
		break;
	default:
		dbug_trans2("ERROR: unknown message type: %d\n", type);
		break;
	}
}
#endif

/* Marker to show a "special" message buffer isn't being used.
   Will be put in the _stp_buffer type field.  The type field Should
   only be manipulated while holding the _stp_ctl_special_msg_lock.  */
#define _STP_CTL_MSG_UNUSED STP_MAX_CMD

/* cmd messages allocated ahead of time.  There can be only one.  */
static struct _stp_buffer *_stp_ctl_start_msg;
static struct _stp_buffer *_stp_ctl_exit_msg;
static struct _stp_buffer *_stp_ctl_transport_msg;
static struct _stp_buffer *_stp_ctl_request_exit_msg;

/* generic overflow messages allocated ahread of time.  */
static struct _stp_buffer *_stp_ctl_oob_warn;
static struct _stp_buffer *_stp_ctl_oob_err;
static struct _stp_buffer *_stp_ctl_system_warn;
static struct _stp_buffer *_stp_ctl_realtime_err;

/* Set aside buffers for all "special" message types, plus generic
   warning and error messages.  */
static int _stp_ctl_alloc_special_buffers(void)
{
	size_t len;
	const char *msg;

	/* There can be only one of start, exit, transport and request.  */
	_stp_ctl_start_msg = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_start_msg == NULL)
		return -1;
	_stp_ctl_start_msg->type = _STP_CTL_MSG_UNUSED;

	_stp_ctl_exit_msg = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_exit_msg == NULL)
		return -1;
	_stp_ctl_exit_msg->type = _STP_CTL_MSG_UNUSED;

	_stp_ctl_transport_msg = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_transport_msg == NULL)
		return -1;
	_stp_ctl_transport_msg->type = _STP_CTL_MSG_UNUSED;

	_stp_ctl_request_exit_msg = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_request_exit_msg == NULL)
		return -1;
	_stp_ctl_request_exit_msg->type = _STP_CTL_MSG_UNUSED;

	/* oob_warn, oob_err, system and realtime are dynamically
	   allocated and a special static warn/err message take their
	   place if we run out of memory before delivery.  */
	_stp_ctl_oob_warn = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_oob_warn == NULL)
		return -1;
	_stp_ctl_oob_warn->type = _STP_CTL_MSG_UNUSED;
	/* Note that the following message shouldn't be translated,
	 * since "WARNING:" is part of the module cmd protocol. */
	msg = "WARNING: too many pending (warning) messages\n";
	len = strlen(msg) + 1;
	_stp_ctl_oob_warn->len = len;
	memcpy(&_stp_ctl_oob_warn->buf, msg, len);

	_stp_ctl_oob_err = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_oob_err == NULL)
		return -1;
	_stp_ctl_oob_err->type = _STP_CTL_MSG_UNUSED;
	/* Note that the following message shouldn't be translated,
	 * since "ERROR:" is part of the module cmd protocol. */
	msg = "ERROR: too many pending (error) messages\n";
	len = strlen(msg) + 1;
	_stp_ctl_oob_err->len = len;
	memcpy(&_stp_ctl_oob_err->buf, msg, len);

	_stp_ctl_system_warn = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_system_warn == NULL)
		return -1;
	_stp_ctl_system_warn->type = _STP_CTL_MSG_UNUSED;
	/* Note that the following message shouldn't be translated,
	 * since "WARNING:" is part of the module cmd protocol. */
	msg = "WARNING: too many pending (system) messages\n";
	len = strlen(msg) + 1;
	_stp_ctl_system_warn->len = len;
	memcpy(&_stp_ctl_system_warn->buf, msg, len);

	_stp_ctl_realtime_err = _stp_mempool_alloc(_stp_pool_q);
	if (_stp_ctl_realtime_err == NULL)
		return -1;
	_stp_ctl_realtime_err->type = _STP_CTL_MSG_UNUSED;
	/* Note that the following message shouldn't be translated,
	 * since "ERROR:" is part of the module cmd protocol. */
	msg = "ERROR: too many pending (realtime) messages\n";
	len = strlen(msg) + 1;
	_stp_ctl_realtime_err->len = len;
	memcpy(&_stp_ctl_realtime_err->buf, msg, len);

	return 0;
}


/* Free the buffers for all "special" message types, plus generic
   warning and error messages.  */
static void _stp_ctl_free_special_buffers(void)
{
	if (_stp_ctl_start_msg != NULL) {
		_stp_mempool_free(_stp_ctl_start_msg);
		_stp_ctl_start_msg = NULL;
	}

	if (_stp_ctl_exit_msg != NULL) {
		_stp_mempool_free(_stp_ctl_exit_msg);
		_stp_ctl_exit_msg = NULL;
	}

	if (_stp_ctl_transport_msg != NULL) {
		_stp_mempool_free(_stp_ctl_transport_msg);
		_stp_ctl_transport_msg = NULL;
	}

	if (_stp_ctl_request_exit_msg != NULL) {
		_stp_mempool_free(_stp_ctl_request_exit_msg);
		_stp_ctl_request_exit_msg = NULL;
	}

	if (_stp_ctl_oob_warn != NULL) {
		_stp_mempool_free(_stp_ctl_oob_warn);
		_stp_ctl_oob_warn = NULL;
	}

	if (_stp_ctl_oob_err != NULL) {
		_stp_mempool_free(_stp_ctl_oob_err);
		_stp_ctl_oob_err = NULL;
	}

	if (_stp_ctl_system_warn != NULL) {
		_stp_mempool_free(_stp_ctl_system_warn);
		_stp_ctl_system_warn = NULL;
	}

	if (_stp_ctl_realtime_err != NULL) {
		_stp_mempool_free(_stp_ctl_realtime_err);
		_stp_ctl_realtime_err = NULL;
	}
}


/* Get a buffer based on type, possibly a generic buffer, when all else
   fails returns NULL and there is nothing we can do.  */
static struct _stp_buffer *_stp_ctl_get_buffer(int type, const char *data,
					       unsigned len)
{
	unsigned long flags;
	struct _stp_buffer *bptr = NULL;

	/* Is it a dynamically allocated message type? */
	if (type == STP_OOB_DATA
	    || type == STP_SYSTEM
	    || type == STP_REALTIME_DATA)
		bptr = _stp_mempool_alloc(_stp_pool_q);

	if (bptr != NULL) {
		bptr->type = type;
		memcpy(bptr->buf, data, len);
		bptr->len = len;
	} else {
		/* "special" type, or no more dynamic buffers.
		   We must be careful to lock to avoid races between
		   marking as used/free.  There can be only one.  */
		switch (type) {
		case STP_START:
			bptr = _stp_ctl_start_msg;
			break;
		case STP_EXIT:
			bptr = _stp_ctl_exit_msg;
			break;
    case STP_NAMESPACES_PID:
      break;
		case STP_TRANSPORT:
			bptr = _stp_ctl_transport_msg;
			break;
		case STP_REQUEST_EXIT:
			bptr = _stp_ctl_request_exit_msg;
			break;
		case STP_OOB_DATA:
			/* Note that "WARNING:" should not be
			 * translated, since it is part of the module
			 * cmd protocol. */
			if (data && len >= 9
			    && strncmp(data, "WARNING: ", 9) == 0)
				bptr = _stp_ctl_oob_warn;
			/* Note that "ERROR:" should not be
			 * translated, since it is part of the module
			 * cmd protocol. */
			else if (data && len >= 7
				 && strncmp(data, "ERROR: ", 7) == 0)
				bptr = _stp_ctl_oob_err;
			else
				printk(KERN_WARNING "_stp_ctl_get_buffer unexpected STP_OOB_DATA\n");
			break;
		case STP_SYSTEM:
			bptr = _stp_ctl_system_warn;
			type = STP_OOB_DATA; /* overflow message */
			break;
		case STP_REALTIME_DATA:
			bptr = _stp_ctl_realtime_err;
			type = STP_OOB_DATA; /* overflow message */
			break;
		default:
			printk(KERN_WARNING "_stp_ctl_get_buffer unknown type: %d\n", type);
			bptr = NULL;
			break;
		}
		if (bptr != NULL) {
			/* OK, it is a special one, but is it free?  */
			stp_spin_lock_irqsave(&_stp_ctl_special_msg_lock, flags);
			if (bptr->type == _STP_CTL_MSG_UNUSED)
				bptr->type = type;
			else
				bptr = NULL;
			stp_spin_unlock_irqrestore(&_stp_ctl_special_msg_lock, flags);
		}

		/* Got a special message buffer, with type set, fill it in,
		   unless it is an "overflow" message.  */
		if (bptr != NULL
		    && bptr != _stp_ctl_oob_warn
		    && bptr != _stp_ctl_oob_err
		    && bptr != _stp_ctl_system_warn
		    && bptr != _stp_ctl_realtime_err) {
			memcpy(bptr->buf, data, len);
			bptr->len = len;
		}
	}
	return bptr;
}

/* Returns the given buffer to the pool when dynamically allocated.
   Marks special buffers as being unused.  */
static void _stp_ctl_free_buffer(struct _stp_buffer *bptr)
{
	unsigned long flags;

	/* Special buffers need special care and locking.  */
	if (bptr == _stp_ctl_start_msg
	    || bptr == _stp_ctl_exit_msg
	    || bptr == _stp_ctl_transport_msg
	    || bptr == _stp_ctl_request_exit_msg
	    || bptr == _stp_ctl_oob_warn
	    || bptr == _stp_ctl_oob_err
	    || bptr == _stp_ctl_system_warn
	    || bptr == _stp_ctl_realtime_err) {
		stp_spin_lock_irqsave(&_stp_ctl_special_msg_lock, flags);
		bptr->type = _STP_CTL_MSG_UNUSED;
		stp_spin_unlock_irqrestore(&_stp_ctl_special_msg_lock, flags);
	} else {
		_stp_mempool_free(bptr);
	}
}

/* Put a message on the _stp_ctl_ready_q.  Safe to call from a probe context.
   Doesn't call wake_up on _stp_ctl_wq (which would not be safe from all
   probe context). A timer will come by and pick up the message to notify
   any readers. Returns the number of bytes queued/send or zero/negative
   on error. */
static int _stp_ctl_send(int type, void *data, unsigned len)
{
	struct context* __restrict__ c = NULL;
	struct _stp_buffer *bptr;
	unsigned long flags;
	unsigned hlen;

#ifdef DEBUG_TRANS
	_stp_ctl_write_dbug(type, data, len);
#endif

	/* Give the fs a chance to do something special.
	   Like merging two packets in case the previous buffer
	   still has some room (transport version 1 procfs does  this. */
	hlen = _stp_ctl_write_fs(type, data, len);
	if (hlen > 0)
		return hlen;

	/* make sure we won't overflow the buffer */
	if (unlikely(len > STP_CTL_BUFFER_SIZE)) {
                _stp_warn("runtime control message type=%d len=%d too large\n", type, len);
		return 0;
	}

	/* Prevent probe reentrancy while grabbing probe-used locks.
	   Since _stp_ctl_send may be called from arbitrary probe context, we
	   have to make sure that all locks it wants can't possibly be held
	   outside probe context too.  This includes:
	    * _stp_ctl_ready_lock
	    * _stp_pool_q->lock
	    * _stp_ctl_special_msg_lock
	   We ensure this by grabbing the context here and everywhere else that
	   uses those locks, so such a probe will appear reentrant and be
	   skipped rather than deadlock.  */
	c = _stp_runtime_entryfn_get_context();

	/* get a buffer from the free pool */
	bptr = _stp_ctl_get_buffer(type, data, len);
	if (unlikely(bptr == NULL)) {
		/* Nothing else we can do... but let's not spam the kernel
                   with these reports. */
                /* printk(KERN_ERR "ctl_write_msg type=%d len=%d ENOMEM\n", type, len); */
		_stp_runtime_entryfn_put_context(c);
		return -ENOMEM;
	}

	/* Put it on the pool of ready buffers.  It's possible to recursively
	   hit a probe here, like a kprobe in NMI or the lock tracepoints, but
	   they will be squashed since we're holding the context busy.  */
	stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
	list_add_tail(&bptr->list, &_stp_ctl_ready_q);
	stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);

	_stp_runtime_entryfn_put_context(c);

	/* It would be nice if we could speed up the notification
	   timer at this point, but calling mod_timer() at this
	   point would bring in more locking issues... */
	return len + sizeof(bptr->type);
}

/* Logs a warning or error through the control channel. This function mimics
   _stp_ctl_send() but directly uses an _stp_buffer to construct the warning or
   error message. This is *only* for warnings and errors. The logtype string
   should be either "WARNING: " or "ERROR: ", and logtype_len shouldn't include
   a trailing NUL termination byte. The message type is always assumed to be
   STP_OOB_DATA since this is only for warnings and errors. */
static void _stp_ctl_log_werr(const char *logtype, size_t logtype_len,
			      const char *fmt, va_list args)
{
	struct context *__restrict__ c;
	struct _stp_buffer *bptr;
	unsigned long flags;

	c = _stp_runtime_entryfn_get_context();
	bptr = _stp_ctl_get_buffer(STP_OOB_DATA, logtype, logtype_len);
	if (!bptr)
		goto put_context;

	/*
	 * This is a generic failure message for when there's no space left. We
	 * aren't allowed to change it, so just go straight to sending it off.
	 */
	if (bptr == _stp_ctl_oob_warn || bptr == _stp_ctl_oob_err)
		goto send_msg;

	/*
	 * The logtype string was already copied in by _stp_ctl_get_buffer(),
	 * now copy the rest of the message. The trailing NUL termination byte
	 * automatically added by vscnprintf() is unneeded, so it's ignored.
	 */
	bptr->len += vscnprintf(bptr->buf + logtype_len,
				STP_CTL_BUFFER_SIZE - logtype_len, fmt, args);

	/*
	 * Make sure the last character is a newline. There will always be
	 * enough space to do this because vscnprintf() reserves a byte for the
	 * trailing NUL character which we don't care about.
	 */
	if (bptr->buf[bptr->len - 1] != '\n')
		bptr->buf[bptr->len++] = '\n';

send_msg:
	stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
	list_add_tail(&bptr->list, &_stp_ctl_ready_q);
	stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);
put_context:
	_stp_runtime_entryfn_put_context(c);
}

/* Calls _stp_ctl_send and then calls wake_up on _stp_ctl_wq
   to immediately notify listeners. DO NOT CALL THIS FROM A (KERNEL)
   PROBE CONTEXT. This is only safe to call from the transport layer
   itself when in user context. All code that could be triggered from
   a probe context should call _stp_ctl_send(). */
static int _stp_ctl_send_notify(int type, void *data, unsigned len)
{
	int ret;
	dbug_trans(1, "_stp_ctl_send_notify: type=%d len=%d\n", type, len);
	ret = _stp_ctl_send(type, data, len);

	/* A message was queued, so wake up all _stp_ctl_wq listeners
	   so stapio can pick it up asap.  */
	if (ret > 0)
		wake_up_interruptible(&_stp_ctl_wq);

	return ret;
}

/** Called when someone tries to read from our .cmd file.
    Will take _stp_ctl_ready_lock and pick off the next _stp_buffer
    from the _stp_ctl_ready_q, will wait_event on _stp_ctl_wq.  */
static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf,
				 size_t count, loff_t *ppos)
{
	struct context* __restrict__ c = NULL;
	struct _stp_buffer *bptr;
	int len;
	unsigned long flags;

	/* Prevent probe reentrancy while grabbing probe-used locks.  */
	c = _stp_runtime_entryfn_get_context();

	/* wait for nonempty ready queue */
	stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
	while (list_empty(&_stp_ctl_ready_q)) {
		stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);
		_stp_runtime_entryfn_put_context(c);
		if (file->f_flags & O_NONBLOCK)
			return -EAGAIN;
		if (wait_event_interruptible(_stp_ctl_wq, !list_empty(&_stp_ctl_ready_q)))
			return -ERESTARTSYS;
		c = _stp_runtime_entryfn_get_context();
		stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
	}

	/* get the next buffer off the ready list */
	bptr = (struct _stp_buffer *)_stp_ctl_ready_q.next;
	list_del_init(&bptr->list);
	stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);

	/* NB: we can't hold the context across copy_to_user, as it might fault.  */
	_stp_runtime_entryfn_put_context(c);

	/* write it out */
	len = bptr->len + 4;
	if (len > count || copy_to_user(buf, &bptr->type, len)) {
		/* Now what?  We took it off the queue then failed to
		 * send it.  We can't put it back on the queue because
		 * it will likely be out-of-order.  Fortunately, this
		 * should never happen.
		 *
		 * FIXME: need to mark this as a transport failure. */
		errk("Supplied buffer too small. count:%d len:%d\n", (int)count, len);
		return -EFAULT;
	}

	/* put it on the pool of free buffers */
	c = _stp_runtime_entryfn_get_context();
	_stp_ctl_free_buffer(bptr);
	_stp_runtime_entryfn_put_context(c);

	return len;
}

static int _stp_ctl_open_cmd(struct inode *inode, struct file *file)
{
	static struct file_operations _stp_ctl_fops;

	if (atomic_inc_return (&_stp_ctl_attached) > 1) {
                atomic_dec (&_stp_ctl_attached);
		return -EBUSY;
        }

	/*
	 * Replace the file's f_op with our own which has the module owner set.
	 * This is needed because, in do_select(), the only thing that can stop
	 * this module from disappearing while data from our procfs file is in
	 * use is the module reference counter. So we need to set the module
	 * owner pointer and then add a reference to our module, since the
	 * reference addition from the open() has already been skipped by the
	 * time this code is reached. The data which can be used after the
	 * module is freed is `&_stp_ctl_wq`, which is stored and later
	 * dereferenced in do_select(). This pointer is passed to do_select()
	 * from the poll_wait() in _stp_ctl_poll_cmd(), which stores it in
	 * `entry->wait_address`. The reason this use-after-free problem exists
	 * is because procfs doesn't allow for passing in a module owner: all
	 * procfs files use an internal `struct file_operations` located in
	 * fs/proc/inode.c. So we patch in a module owner the hard way. No
	 * locking is needed here due to the `_stp_ctl_attached` guard above.
	 * Note that `_stp_ctl_fops` can only be initialized once; initializing
	 * it more than once could cause a bad race because _stp_ctl_close_cmd()
	 * is called *before* the final `file->f_op` usage, meaning that the
	 * `_stp_ctl_attached` guard won't stop us from mangling `_stp_ctl_fops`
	 * while it's in use for closing an old control channel fd.
	 */
	if (_stp_ctl_fops.owner != THIS_MODULE) {
		_stp_ctl_fops = *file->f_op;
		_stp_ctl_fops.owner = THIS_MODULE;
	}
	__module_get(THIS_MODULE);
	file->f_op = &_stp_ctl_fops;

	_stp_attach();
	return 0;
}

static int _stp_ctl_close_cmd(struct inode *inode, struct file *file)
{
        if (atomic_dec_return (&_stp_ctl_attached) > 0) {
                BUG();
                return -EINVAL;
        }
        _stp_detach();
	return 0;
}

static unsigned _stp_ctl_poll_cmd(struct file *file, poll_table *wait)
{
	/* Pretend we can always write and that there is
	   priority data available.  We do this so select
	   will report an exception condition on the file,
	   which is used by stapio to see whether select
	   works. */
	unsigned res = POLLPRI | POLLOUT | POLLWRNORM;
	unsigned long flags;

        poll_wait(file, &_stp_ctl_wq, wait);

        /* If there are messages waiting, then there will be
	   data available to read. */
	stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
	if (! list_empty(&_stp_ctl_ready_q))
		res |= POLLIN | POLLRDNORM;
	stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);

	return res;
}


static struct file_operations _stp_ctl_fops_cmd = {
	.owner = THIS_MODULE,
	.read = _stp_ctl_read_cmd,
	.write = _stp_ctl_write_cmd,
	.open = _stp_ctl_open_cmd,
	.release = _stp_ctl_close_cmd,
	.poll = _stp_ctl_poll_cmd
};
#ifdef STAPCONF_PROC_OPS
static struct proc_ops _stp_ctl_proc_ops_cmd = {
	.proc_read = _stp_ctl_read_cmd,
	.proc_write = _stp_ctl_write_cmd,
	.proc_open = _stp_ctl_open_cmd,
	.proc_release = _stp_ctl_close_cmd,
	.proc_poll = _stp_ctl_poll_cmd
};
#endif

static int _stp_register_ctl_channel(void)
{
	INIT_LIST_HEAD(&_stp_ctl_ready_q);

	/* allocate buffers */
	_stp_pool_q = _stp_mempool_init(sizeof(struct _stp_buffer),
					STP_DEFAULT_BUFFERS);
	if (unlikely(_stp_pool_q == NULL))
		goto err0;
	_stp_allocated_net_memory += sizeof(struct _stp_buffer) * STP_DEFAULT_BUFFERS;

	if (unlikely(_stp_ctl_alloc_special_buffers() != 0))
		goto err0;

	if (_stp_register_ctl_channel_fs() != 0) // procfs or debugfs decision time
		goto err0;

	return 0;

err0:
	_stp_mempool_destroy(_stp_pool_q);
	errk("Error creating systemtap control channel.\n");
	return -1;
}

static void _stp_unregister_ctl_channel(void)
{
	struct _stp_buffer *bptr, *tmp;

	_stp_unregister_ctl_channel_fs();

	/* Return memory to pool and free it. */
	list_for_each_entry_safe(bptr, tmp, &_stp_ctl_ready_q, list) {
		list_del(&bptr->list);
		_stp_ctl_free_buffer(bptr);
	}
	_stp_ctl_free_special_buffers();
	_stp_mempool_destroy(_stp_pool_q);
}
Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists