trace_functions_graph.c 39.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4
/*
 *
 * Function graph tracer.
5
 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
6 7 8 9 10 11
 * Mostly borrowed from function tracer which
 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
 *
 */
#include <linux/uaccess.h>
#include <linux/ftrace.h>
12
#include <linux/interrupt.h>
13
#include <linux/slab.h>
14 15 16
#include <linux/fs.h>

#include "trace.h"
17
#include "trace_output.h"
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
static bool kill_ftrace_graph;

/**
 * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
 *
 * ftrace_graph_stop() is called when a severe error is detected in
 * the function graph tracing. This function is called by the critical
 * paths of function graph to keep those paths from doing any more harm.
 */
bool ftrace_graph_is_dead(void)
{
	return kill_ftrace_graph;
}

/**
 * ftrace_graph_stop - set to permanently disable function graph tracincg
 *
 * In case of an error int function graph tracing, this is called
 * to try to keep function graph tracing from causing any more harm.
 * Usually this is pretty severe and this is called to try to at least
 * get a warning out to the user.
 */
void ftrace_graph_stop(void)
{
	kill_ftrace_graph = true;
}

46 47 48
/* When set, irq functions will be ignored */
static int ftrace_graph_skip_irqs;

49
struct fgraph_cpu_data {
50 51
	pid_t		last_pid;
	int		depth;
52
	int		depth_irq;
53
	int		ignore;
54
	unsigned long	enter_funcs[FTRACE_RETFUNC_DEPTH];
55 56 57
};

struct fgraph_data {
58
	struct fgraph_cpu_data __percpu *cpu_data;
59 60 61 62 63 64

	/* Place to preserve last processed entry. */
	struct ftrace_graph_ent_entry	ent;
	struct ftrace_graph_ret_entry	ret;
	int				failed;
	int				cpu;
65 66
};

67
#define TRACE_GRAPH_INDENT	2
68

69
unsigned int fgraph_max_depth;
70

71
static struct tracer_opt trace_opts[] = {
72
	/* Display overruns? (for self-debug purpose) */
73 74 75 76 77
	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
	/* Display CPU ? */
	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
	/* Display Overhead ? */
	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
78 79
	/* Display proc name/pid */
	{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
80 81 82 83
	/* Display duration of execution */
	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
	/* Display absolute time of an entry */
	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
84 85
	/* Display interrupts */
	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
86 87
	/* Display function name after trailing } */
	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
88 89 90 91
	/* Include sleep time (scheduled out) between entry and return */
	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
	/* Include time within nested functions */
	{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
92 93 94 95
	{ } /* Empty entry */
};

static struct tracer_flags tracer_flags = {
96
	/* Don't display overruns, proc, or tail by default */
97
	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
98 99
	       TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS |
	       TRACE_GRAPH_SLEEP_TIME | TRACE_GRAPH_GRAPH_TIME,
100 101 102
	.opts = trace_opts
};

103
static struct trace_array *graph_array;
104

105 106 107 108 109 110
/*
 * DURATION column is being also used to display IRQ signs,
 * following values are used by print_graph_irq and others
 * to fill in space into DURATION column.
 */
enum {
111 112 113
	FLAGS_FILL_FULL  = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT,
	FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT,
	FLAGS_FILL_END   = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,
114 115
};

116
static void
117 118
print_graph_duration(struct trace_array *tr, unsigned long long duration,
		     struct trace_seq *s, u32 flags);
119

120
/* Add a function return address to the trace stack on thread info.*/
121
static int
122
ftrace_push_return_trace(unsigned long ret, unsigned long func,
123
			 unsigned long frame_pointer, unsigned long *retp)
124
{
125
	unsigned long long calltime;
126 127
	int index;

128 129 130
	if (unlikely(ftrace_graph_is_dead()))
		return -EBUSY;

131 132 133
	if (!current->ret_stack)
		return -EBUSY;

134 135 136 137 138 139
	/*
	 * We must make sure the ret_stack is tested before we read
	 * anything else.
	 */
	smp_rmb();

140 141 142 143 144 145
	/* The return trace stack is full */
	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
		atomic_inc(&current->trace_overrun);
		return -EBUSY;
	}

146 147 148 149 150 151 152 153 154 155 156 157 158 159
	/*
	 * The curr_ret_stack is an index to ftrace return stack of
	 * current task.  Its value should be in [0, FTRACE_RETFUNC_
	 * DEPTH) when the function graph tracer is used.  To support
	 * filtering out specific functions, it makes the index
	 * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
	 * so when it sees a negative index the ftrace will ignore
	 * the record.  And the index gets recovered when returning
	 * from the filtered function by adding the FTRACE_NOTRACE_
	 * DEPTH and then it'll continue to record functions normally.
	 *
	 * The curr_ret_stack is initialized to -1 and get increased
	 * in this function.  So it can be less than -1 only if it was
	 * filtered out via ftrace_graph_notrace_addr() which can be
160
	 * set from set_graph_notrace file in tracefs by user.
161 162 163 164
	 */
	if (current->curr_ret_stack < -1)
		return -EBUSY;

165 166
	calltime = trace_clock_local();

167
	index = ++current->curr_ret_stack;
168 169
	if (ftrace_graph_notrace_addr(func))
		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
170 171 172
	barrier();
	current->ret_stack[index].ret = ret;
	current->ret_stack[index].func = func;
173
	current->ret_stack[index].calltime = calltime;
174
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
175
	current->ret_stack[index].fp = frame_pointer;
176 177 178
#endif
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
	current->ret_stack[index].retp = retp;
179
#endif
180 181 182
	return 0;
}

183 184 185 186 187 188
int function_graph_enter(unsigned long ret, unsigned long func,
			 unsigned long frame_pointer, unsigned long *retp)
{
	struct ftrace_graph_ent trace;

	trace.func = func;
189
	trace.depth = ++current->curr_ret_depth;
190

191 192 193 194
	if (ftrace_push_return_trace(ret, func,
				     frame_pointer, retp))
		goto out;

195 196 197 198
	/* Only trace if the calling function expects to */
	if (!ftrace_graph_entry(&trace))
		goto out_ret;

199
	return 0;
200 201
 out_ret:
	current->curr_ret_stack--;
202 203 204
 out:
	current->curr_ret_depth--;
	return -EBUSY;
205 206
}

207
/* Retrieve a function return address to the trace stack on thread info.*/
208
static void
209 210
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
			unsigned long frame_pointer)
211 212 213 214 215
{
	int index;

	index = current->curr_ret_stack;

216 217 218 219 220 221 222 223 224 225 226
	/*
	 * A negative index here means that it's just returned from a
	 * notrace'd function.  Recover index to get an original
	 * return address.  See ftrace_push_return_trace().
	 *
	 * TODO: Need to check whether the stack gets corrupted.
	 */
	if (index < 0)
		index += FTRACE_NOTRACE_DEPTH;

	if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
227 228 229 230 231 232 233
		ftrace_graph_stop();
		WARN_ON(1);
		/* Might as well panic, otherwise we have no where to go */
		*ret = (unsigned long)panic;
		return;
	}

234
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
235 236 237 238 239 240 241 242 243 244
	/*
	 * The arch may choose to record the frame pointer used
	 * and check it here to make sure that it is what we expect it
	 * to be. If gcc does not set the place holder of the return
	 * address in the frame pointer, and does a copy instead, then
	 * the function graph trace will fail. This test detects this
	 * case.
	 *
	 * Currently, x86_32 with optimize for size (-Os) makes the latest
	 * gcc do the above.
245 246 247
	 *
	 * Note, -mfentry does not use frame pointers, and this test
	 *  is not needed if CC_USING_FENTRY is set.
248 249 250 251
	 */
	if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
		ftrace_graph_stop();
		WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
252
		     "  from func %ps return to %lx\n",
253 254 255 256 257 258 259 260 261
		     current->ret_stack[index].fp,
		     frame_pointer,
		     (void *)current->ret_stack[index].func,
		     current->ret_stack[index].ret);
		*ret = (unsigned long)panic;
		return;
	}
#endif

262 263 264 265
	*ret = current->ret_stack[index].ret;
	trace->func = current->ret_stack[index].func;
	trace->calltime = current->ret_stack[index].calltime;
	trace->overrun = atomic_read(&current->trace_overrun);
266 267 268 269 270 271 272
	trace->depth = current->curr_ret_depth--;
	/*
	 * We still want to trace interrupts coming in if
	 * max_depth is set to 1. Make sure the decrement is
	 * seen before ftrace_graph_return.
	 */
	barrier();
273 274 275 276 277 278
}

/*
 * Send the trace to the ring-buffer.
 * @return the original return address.
 */
279
unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
280 281 282 283
{
	struct ftrace_graph_ret trace;
	unsigned long ret;

284
	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
285
	trace.rettime = trace_clock_local();
286 287 288 289 290 291
	ftrace_graph_return(&trace);
	/*
	 * The ftrace_graph_return() may still access the current
	 * ret_stack structure, we need to make sure the update of
	 * curr_ret_stack is after that.
	 */
292 293
	barrier();
	current->curr_ret_stack--;
294 295 296 297 298 299 300 301 302
	/*
	 * The curr_ret_stack can be less than -1 only if it was
	 * filtered out and it's about to return from the function.
	 * Recover the index and continue to trace normal functions.
	 */
	if (current->curr_ret_stack < -1) {
		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
		return ret;
	}
303 304 305 306 307 308 309 310 311 312 313

	if (unlikely(!ret)) {
		ftrace_graph_stop();
		WARN_ON(1);
		/* Might as well panic. What else to do? */
		ret = (unsigned long)panic;
	}

	return ret;
}

314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
/**
 * ftrace_graph_ret_addr - convert a potentially modified stack return address
 *			   to its original value
 *
 * This function can be called by stack unwinding code to convert a found stack
 * return address ('ret') to its original value, in case the function graph
 * tracer has modified it to be 'return_to_handler'.  If the address hasn't
 * been modified, the unchanged value of 'ret' is returned.
 *
 * 'idx' is a state variable which should be initialized by the caller to zero
 * before the first call.
 *
 * 'retp' is a pointer to the return address on the stack.  It's ignored if
 * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
 */
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
				    unsigned long ret, unsigned long *retp)
{
	int index = task->curr_ret_stack;
	int i;

	if (ret != (unsigned long)return_to_handler)
		return ret;

	if (index < -1)
		index += FTRACE_NOTRACE_DEPTH;

	if (index < 0)
		return ret;

	for (i = 0; i <= index; i++)
		if (task->ret_stack[i].retp == retp)
			return task->ret_stack[i].ret;

	return ret;
}
#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
				    unsigned long ret, unsigned long *retp)
{
	int task_idx;

	if (ret != (unsigned long)return_to_handler)
		return ret;

	task_idx = task->curr_ret_stack;

	if (!task->ret_stack || task_idx < *idx)
		return ret;

	task_idx -= *idx;
	(*idx)++;

	return task->ret_stack[task_idx].ret;
}
#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */

372
int __trace_graph_entry(struct trace_array *tr,
373 374 375 376
				struct ftrace_graph_ent *trace,
				unsigned long flags,
				int pc)
{
377
	struct trace_event_call *call = &event_funcgraph_entry;
378
	struct ring_buffer_event *event;
379
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
380 381
	struct ftrace_graph_ent_entry *entry;

382
	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
383 384 385 386 387
					  sizeof(*entry), flags, pc);
	if (!event)
		return 0;
	entry	= ring_buffer_event_data(event);
	entry->graph_ent			= *trace;
388
	if (!call_filter_check_discard(call, entry, buffer, event))
389
		trace_buffer_unlock_commit_nostack(buffer, event);
390 391 392 393

	return 1;
}

394 395
static inline int ftrace_graph_ignore_irqs(void)
{
396
	if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
397 398 399 400 401
		return 0;

	return in_irq();
}

402 403 404 405 406 407 408 409 410 411
int trace_graph_entry(struct ftrace_graph_ent *trace)
{
	struct trace_array *tr = graph_array;
	struct trace_array_cpu *data;
	unsigned long flags;
	long disabled;
	int ret;
	int cpu;
	int pc;

412
	if (!ftrace_trace_task(tr))
413 414
		return 0;

415 416 417 418
	if (ftrace_graph_ignore_func(trace))
		return 0;

	if (ftrace_graph_ignore_irqs())
419 420
		return 0;

421 422 423 424 425 426 427 428 429 430
	/*
	 * Do not trace a function if it's filtered by set_graph_notrace.
	 * Make the index of ret stack negative to indicate that it should
	 * ignore further functions.  But it needs its own ret stack entry
	 * to recover the original index in order to continue tracing after
	 * returning from the function.
	 */
	if (ftrace_graph_notrace_addr(trace->func))
		return 1;

431 432 433 434 435 436 437
	/*
	 * Stop here if tracing_threshold is set. We only write function return
	 * events to the ring buffer.
	 */
	if (tracing_thresh)
		return 1;

438 439
	local_irq_save(flags);
	cpu = raw_smp_processor_id();
440
	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
441 442 443 444 445 446 447 448 449 450 451 452 453 454
	disabled = atomic_inc_return(&data->disabled);
	if (likely(disabled == 1)) {
		pc = preempt_count();
		ret = __trace_graph_entry(tr, trace, flags, pc);
	} else {
		ret = 0;
	}

	atomic_dec(&data->disabled);
	local_irq_restore(flags);

	return ret;
}

455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
static void
__trace_graph_function(struct trace_array *tr,
		unsigned long ip, unsigned long flags, int pc)
{
	u64 time = trace_clock_local();
	struct ftrace_graph_ent ent = {
		.func  = ip,
		.depth = 0,
	};
	struct ftrace_graph_ret ret = {
		.func     = ip,
		.depth    = 0,
		.calltime = time,
		.rettime  = time,
	};

	__trace_graph_entry(tr, &ent, flags, pc);
	__trace_graph_return(tr, &ret, flags, pc);
}

void
trace_graph_function(struct trace_array *tr,
		unsigned long ip, unsigned long parent_ip,
		unsigned long flags, int pc)
{
	__trace_graph_function(tr, ip, flags, pc);
}

483
void __trace_graph_return(struct trace_array *tr,
484 485 486 487
				struct ftrace_graph_ret *trace,
				unsigned long flags,
				int pc)
{
488
	struct trace_event_call *call = &event_funcgraph_exit;
489
	struct ring_buffer_event *event;
490
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
491 492
	struct ftrace_graph_ret_entry *entry;

493
	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
494 495 496 497 498
					  sizeof(*entry), flags, pc);
	if (!event)
		return;
	entry	= ring_buffer_event_data(event);
	entry->ret				= *trace;
499
	if (!call_filter_check_discard(call, entry, buffer, event))
500
		trace_buffer_unlock_commit_nostack(buffer, event);
501 502 503 504 505 506 507 508 509 510 511
}

void trace_graph_return(struct ftrace_graph_ret *trace)
{
	struct trace_array *tr = graph_array;
	struct trace_array_cpu *data;
	unsigned long flags;
	long disabled;
	int cpu;
	int pc;

512 513
	ftrace_graph_addr_finish(trace);

514 515
	local_irq_save(flags);
	cpu = raw_smp_processor_id();
516
	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
517 518 519 520 521 522 523 524 525
	disabled = atomic_inc_return(&data->disabled);
	if (likely(disabled == 1)) {
		pc = preempt_count();
		__trace_graph_return(tr, trace, flags, pc);
	}
	atomic_dec(&data->disabled);
	local_irq_restore(flags);
}

526 527 528 529 530 531 532 533 534
void set_graph_array(struct trace_array *tr)
{
	graph_array = tr;

	/* Make graph_array visible before we start tracing */

	smp_mb();
}

535
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
536
{
537 538
	ftrace_graph_addr_finish(trace);

539 540 541 542 543 544 545
	if (tracing_thresh &&
	    (trace->rettime - trace->calltime < tracing_thresh))
		return;
	else
		trace_graph_return(trace);
}

546 547
static int graph_trace_init(struct trace_array *tr)
{
548 549
	int ret;

550
	set_graph_array(tr);
551 552
	if (tracing_thresh)
		ret = register_ftrace_graph(&trace_graph_thresh_return,
553
					    &trace_graph_entry);
554 555 556
	else
		ret = register_ftrace_graph(&trace_graph_return,
					    &trace_graph_entry);
557 558 559 560 561
	if (ret)
		return ret;
	tracing_start_cmdline_record();

	return 0;
562 563 564 565
}

static void graph_trace_reset(struct trace_array *tr)
{
566 567
	tracing_stop_cmdline_record();
	unregister_ftrace_graph();
568 569
}

570
static int graph_trace_update_thresh(struct trace_array *tr)
571 572 573 574 575
{
	graph_trace_reset(tr);
	return graph_trace_init(tr);
}

576
static int max_bytes_for_cpu;
577

578
static void print_graph_cpu(struct trace_seq *s, int cpu)
579
{
580 581 582 583 584
	/*
	 * Start with a space character - to make it stand out
	 * to the right a bit when trace output is pasted into
	 * email:
	 */
585
	trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
586 587
}

588 589
#define TRACE_GRAPH_PROCINFO_LENGTH	14

590
static void print_graph_proc(struct trace_seq *s, pid_t pid)
591
{
592
	char comm[TASK_COMM_LEN];
593 594
	/* sign + log10(MAX_INT) + '\0' */
	char pid_str[11];
595 596 597
	int spaces = 0;
	int len;
	int i;
598

599
	trace_find_cmdline(pid, comm);
600 601 602 603 604 605 606 607 608 609
	comm[7] = '\0';
	sprintf(pid_str, "%d", pid);

	/* 1 stands for the "-" character */
	len = strlen(comm) + strlen(pid_str) + 1;

	if (len < TRACE_GRAPH_PROCINFO_LENGTH)
		spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;

	/* First spaces to align center */
610 611
	for (i = 0; i < spaces / 2; i++)
		trace_seq_putc(s, ' ');
612

613
	trace_seq_printf(s, "%s-%s", comm, pid_str);
614 615

	/* Last spaces to align center */
616 617
	for (i = 0; i < spaces - (spaces / 2); i++)
		trace_seq_putc(s, ' ');
618 619
}

620

621
static void print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
622
{
623 624
	trace_seq_putc(s, ' ');
	trace_print_lat_fmt(s, entry);
625 626
}

627
/* If the pid changed since the last trace, output this event */
628
static void
629
verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
630
{
631
	pid_t prev_pid;
632
	pid_t *last_pid;
633

634
	if (!data)
635
		return;
636

637
	last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
638 639

	if (*last_pid == pid)
640
		return;
641

642 643
	prev_pid = *last_pid;
	*last_pid = pid;
644

645
	if (prev_pid == -1)
646
		return;
647 648 649 650 651 652 653 654
/*
 * Context-switch trace line:

 ------------------------------------------
 | 1)  migration/0--1  =>  sshd-1755
 ------------------------------------------

 */
655 656 657 658 659 660
	trace_seq_puts(s, " ------------------------------------------\n");
	print_graph_cpu(s, cpu);
	print_graph_proc(s, prev_pid);
	trace_seq_puts(s, " => ");
	print_graph_proc(s, pid);
	trace_seq_puts(s, "\n ------------------------------------------\n\n");
661 662
}

663 664
static struct ftrace_graph_ret_entry *
get_return_for_leaf(struct trace_iterator *iter,
665 666
		struct ftrace_graph_ent_entry *curr)
{
667 668
	struct fgraph_data *data = iter->private;
	struct ring_buffer_iter *ring_iter = NULL;
669 670 671
	struct ring_buffer_event *event;
	struct ftrace_graph_ret_entry *next;

672 673 674 675 676 677 678 679
	/*
	 * If the previous output failed to write to the seq buffer,
	 * then we just reuse the data from before.
	 */
	if (data && data->failed) {
		curr = &data->ent;
		next = &data->ret;
	} else {
680

681
		ring_iter = trace_buffer_iter(iter, iter->cpu);
682 683 684 685 686 687 688 689 690

		/* First peek to compare current entry and the next one */
		if (ring_iter)
			event = ring_buffer_iter_peek(ring_iter, NULL);
		else {
			/*
			 * We need to consume the current entry to see
			 * the next one.
			 */
691
			ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu,
692
					    NULL, NULL);
693
			event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu,
694
						 NULL, NULL);
695
		}
696

697 698 699 700
		if (!event)
			return NULL;

		next = ring_buffer_event_data(event);
701

702 703 704 705 706 707
		if (data) {
			/*
			 * Save current and next entries for later reference
			 * if the output fails.
			 */
			data->ent = *curr;
708 709 710 711 712 713 714 715 716
			/*
			 * If the next event is not a return type, then
			 * we only care about what type it is. Otherwise we can
			 * safely copy the entire event.
			 */
			if (next->ent.type == TRACE_GRAPH_RET)
				data->ret = *next;
			else
				data->ret.ent.type = next->ent.type;
717 718
		}
	}
719 720

	if (next->ent.type != TRACE_GRAPH_RET)
721
		return NULL;
722 723 724

	if (curr->ent.pid != next->ent.pid ||
			curr->graph_ent.func != next->ret.func)
725
		return NULL;
726

727 728 729 730 731
	/* this is a leaf, now advance the iterator */
	if (ring_iter)
		ring_buffer_read(ring_iter, NULL);

	return next;
732 733
}

734
static void print_graph_abs_time(u64 t, struct trace_seq *s)
735 736 737 738 739 740
{
	unsigned long usecs_rem;

	usecs_rem = do_div(t, NSEC_PER_SEC);
	usecs_rem /= 1000;

741 742
	trace_seq_printf(s, "%5lu.%06lu |  ",
			 (unsigned long)t, usecs_rem);
743 744
}

745
static void
746
print_graph_irq(struct trace_iterator *iter, unsigned long addr,
747
		enum trace_type type, int cpu, pid_t pid, u32 flags)
748
{
749
	struct trace_array *tr = iter->tr;
750
	struct trace_seq *s = &iter->seq;
751
	struct trace_entry *ent = iter->ent;
752 753 754

	if (addr < (unsigned long)__irqentry_text_start ||
		addr >= (unsigned long)__irqentry_text_end)
755
		return;
756

757
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
758
		/* Absolute time */
759 760
		if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
			print_graph_abs_time(iter->ts, s);
761

762
		/* Cpu */
763 764
		if (flags & TRACE_GRAPH_PRINT_CPU)
			print_graph_cpu(s, cpu);
765

766 767
		/* Proc */
		if (flags & TRACE_GRAPH_PRINT_PROC) {
768 769
			print_graph_proc(s, pid);
			trace_seq_puts(s, " | ");
770
		}
771 772

		/* Latency format */
773
		if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
774
			print_graph_lat_fmt(s, ent);
775
	}
776

777
	/* No overhead */
778
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_START);
779

780
	if (type == TRACE_GRAPH_ENT)
781
		trace_seq_puts(s, "==========>");
782
	else
783
		trace_seq_puts(s, "<==========");
784

785
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_END);
786
	trace_seq_putc(s, '\n');
787
}
788

789
void
790
trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
791 792
{
	unsigned long nsecs_rem = do_div(duration, 1000);
793
	/* log10(ULONG_MAX) + '\0' */
794
	char usecs_str[21];
795
	char nsecs_str[5];
796
	int len;
797 798
	int i;

799
	sprintf(usecs_str, "%lu", (unsigned long) duration);
800 801

	/* Print msecs */
802
	trace_seq_printf(s, "%s", usecs_str);
803

804
	len = strlen(usecs_str);
805 806 807

	/* Print nsecs (we don't want to exceed 7 numbers) */
	if (len < 7) {
808 809 810
		size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);

		snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
811
		trace_seq_printf(s, ".%s", nsecs_str);
812
		len += strlen(nsecs_str) + 1;
813 814
	}

815
	trace_seq_puts(s, " us ");
816 817

	/* Print remaining spaces to fit the row's width */
818
	for (i = len; i < 8; i++)
819
		trace_seq_putc(s, ' ');
820 821
}

822
static void
823 824
print_graph_duration(struct trace_array *tr, unsigned long long duration,
		     struct trace_seq *s, u32 flags)
825
{
826
	if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
827
	    !(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
828
		return;
829 830

	/* No real adata, just filling the column with spaces */
831 832
	switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) {
	case FLAGS_FILL_FULL:
833 834
		trace_seq_puts(s, "              |  ");
		return;
835
	case FLAGS_FILL_START:
836 837
		trace_seq_puts(s, "  ");
		return;
838
	case FLAGS_FILL_END:
839 840
		trace_seq_puts(s, " |");
		return;
841 842 843
	}

	/* Signal a overhead of time execution to the output */
844 845 846
	if (flags & TRACE_GRAPH_PRINT_OVERHEAD)
		trace_seq_printf(s, "%c ", trace_find_mark(duration));
	else
847
		trace_seq_puts(s, "  ");
848

849 850
	trace_print_graph_duration(duration, s);
	trace_seq_puts(s, "|  ");
851 852 853
}

/* Case of a leaf function on its call entry */
854
static enum print_line_t
855
print_graph_entry_leaf(struct trace_iterator *iter,
856
		struct ftrace_graph_ent_entry *entry,
857 858
		struct ftrace_graph_ret_entry *ret_entry,
		struct trace_seq *s, u32 flags)
859
{
860
	struct fgraph_data *data = iter->private;
861
	struct trace_array *tr = iter->tr;
862 863 864
	struct ftrace_graph_ret *graph_ret;
	struct ftrace_graph_ent *call;
	unsigned long long duration;
865
	int cpu = iter->cpu;
866
	int i;
867

868 869 870 871
	graph_ret = &ret_entry->ret;
	call = &entry->graph_ent;
	duration = graph_ret->rettime - graph_ret->calltime;

872
	if (data) {
873 874 875
		struct fgraph_cpu_data *cpu_data;

		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
876

877 878 879 880
		/* If a graph tracer ignored set_graph_notrace */
		if (call->depth < -1)
			call->depth += FTRACE_NOTRACE_DEPTH;

881 882 883 884 885
		/*
		 * Comments display at + 1 to depth. Since
		 * this is a leaf function, keep the comments
		 * equal to this depth.
		 */
886 887 888
		cpu_data->depth = call->depth - 1;

		/* No need to keep this function around for this depth */
889 890
		if (call->depth < FTRACE_RETFUNC_DEPTH &&
		    !WARN_ON_ONCE(call->depth < 0))
891
			cpu_data->enter_funcs[call->depth] = 0;
892 893
	}

894
	/* Overhead and duration */
895
	print_graph_duration(tr, duration, s, flags);
896

897
	/* Function */
898 899
	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');
900

901
	trace_seq_printf(s, "%ps();\n", (void *)call->func);
902

903 904 905
	print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
			cpu, iter->ent->pid, flags);

906
	return trace_handle_return(s);
907 908 909
}

static enum print_line_t
910 911
print_graph_entry_nested(struct trace_iterator *iter,
			 struct ftrace_graph_ent_entry *entry,
912
			 struct trace_seq *s, int cpu, u32 flags)
913 914
{
	struct ftrace_graph_ent *call = &entry->graph_ent;
915
	struct fgraph_data *data = iter->private;
916
	struct trace_array *tr = iter->tr;
917 918 919
	int i;

	if (data) {
920
		struct fgraph_cpu_data *cpu_data;
921 922
		int cpu = iter->cpu;

923 924 925 926
		/* If a graph tracer ignored set_graph_notrace */
		if (call->depth < -1)
			call->depth += FTRACE_NOTRACE_DEPTH;

927 928 929 930
		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
		cpu_data->depth = call->depth;

		/* Save this function pointer to see if the exit matches */
931 932
		if (call->depth < FTRACE_RETFUNC_DEPTH &&
		    !WARN_ON_ONCE(call->depth < 0))
933
			cpu_data->enter_funcs[call->depth] = call->func;
934
	}
935

936
	/* No time */
937
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL);
938

939
	/* Function */
940 941 942 943
	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');

	trace_seq_printf(s, "%ps() {\n", (void *)call->func);
944

945
	if (trace_seq_has_overflowed(s))
946 947
		return TRACE_TYPE_PARTIAL_LINE;

948 949 950 951 952
	/*
	 * we already consumed the current entry to check the next one
	 * and see if this is a leaf.
	 */
	return TRACE_TYPE_NO_CONSUME;
953 954
}

955
static void
956
print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
957
		     int type, unsigned long addr, u32 flags)
958
{
959
	struct fgraph_data *data = iter->private;
960
	struct trace_entry *ent = iter->ent;
961
	struct trace_array *tr = iter->tr;
962
	int cpu = iter->cpu;
963

964
	/* Pid */
965
	verif_pid(s, ent->pid, cpu, data);
966

967
	if (type)
968
		/* Interrupt */
969
		print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
970

971
	if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
972
		return;
973

974
	/* Absolute time */
975 976
	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
		print_graph_abs_time(iter->ts, s);
977

978
	/* Cpu */
979 980
	if (flags & TRACE_GRAPH_PRINT_CPU)
		print_graph_cpu(s, cpu);
981 982

	/* Proc */