Linux webserver 6.8.0-49-generic #49~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Nov 6 17:42:15 UTC 2 x86_64
Apache/2.4.52 (Ubuntu)
Server IP : 192.168.1.1 & Your IP : 3.12.164.78
Domains :
Cant Read [ /etc/named.conf ]
User : www-data
Terminal
Auto Root
Create File
Create Folder
Localroot Suggester
Backdoor Destroyer
Readme
/
snap /
lxd /
31333 /
share /
openvswitch /
scripts /
usdt /
Delete
Unzip
Name
Size
Permission
Date
Action
bridge_loop.bt
4.91
KB
-rwxr-xr-x
2024-11-20 14:02
dpif_nl_exec_monitor.py
21.67
KB
-rwxr-xr-x
2024-11-20 14:02
kernel_delay.py
43
KB
-rwxr-xr-x
2024-11-20 14:02
kernel_delay.rst
29.64
KB
-rwxr-xr-x
2024-11-20 14:02
reval_monitor.py
29.42
KB
-rwxr-xr-x
2024-11-20 14:02
upcall_cost.py
64.38
KB
-rwxr-xr-x
2024-11-20 14:02
upcall_monitor.py
17.19
KB
-rwxr-xr-x
2024-11-20 14:02
Save
Rename
#!/usr/bin/env python3 # # Copyright (c) 2022,2023 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Script information: # ------------------- # This script allows a developer to quickly identify if the issue at hand # might be related to the kernel running out of resources or if it really is # an Open vSwitch issue. # # For documentation see the kernel_delay.rst file. # # # Dependencies: # ------------- # You need to install the BCC package for your specific platform or build it # yourself using the following instructions: # https://raw.githubusercontent.com/iovisor/bcc/master/INSTALL.md # # Python needs the following additional packages installed: # - pytz # - psutil # # You can either install your distribution specific package or use pip: # pip install pytz psutil # import argparse import datetime import os import pytz import psutil import re import sys import time import ctypes as ct try: from bcc import BPF, USDT, USDTException from bcc.syscall import syscalls, syscall_name except ModuleNotFoundError: print("ERROR: Can't find the BPF Compiler Collection (BCC) tools!") sys.exit(os.EX_OSFILE) from enum import IntEnum # # Actual eBPF source code # EBPF_SOURCE = """ #include <linux/irq.h> #include <linux/sched.h> #define MONITOR_PID <MONITOR_PID> enum { <EVENT_ENUM> }; struct event_t { u64 ts; u32 tid; u32 id; int user_stack_id; int kernel_stack_id; u32 syscall; u64 entry_ts; }; BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>); BPF_STACK_TRACE(stack_traces, <STACK_TRACE_SIZE>); BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1); BPF_TABLE("percpu_array", uint32_t, uint64_t, trigger_miss, 1); BPF_ARRAY(capture_on, u64, 1); static inline bool capture_enabled(u64 pid_tgid) { int key = 0; u64 *ret; if ((pid_tgid >> 32) != MONITOR_PID) return false; ret = capture_on.lookup(&key); return ret && *ret == 1; } static inline bool capture_enabled__() { int key = 0; u64 *ret; ret = capture_on.lookup(&key); return ret && *ret == 1; } static struct event_t *get_event(uint32_t id) { struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t)); if (!event) { dropcnt.increment(0); return NULL; } event->id = id; event->ts = bpf_ktime_get_ns(); event->tid = bpf_get_current_pid_tgid(); return event; } static int start_trigger() { int key = 0; u64 *val = capture_on.lookup(&key); /* If the value is -1 we can't start as we are still processing the * results in userspace. */ if (!val || *val != 0) { trigger_miss.increment(0); return 0; } struct event_t *event = get_event(EVENT_START_TRIGGER); if (event) { events.ringbuf_submit(event, 0); *val = 1; } else { trigger_miss.increment(0); } return 0; } static int stop_trigger() { int key = 0; u64 *val = capture_on.lookup(&key); if (!val || *val != 1) return 0; struct event_t *event = get_event(EVENT_STOP_TRIGGER); if (event) events.ringbuf_submit(event, 0); if (val) *val = -1; return 0; } <START_TRIGGER> <STOP_TRIGGER> /* * For the syscall monitor the following probes get installed. */ struct syscall_data_t { u64 count; u64 total_ns; u64 worst_ns; }; struct syscall_data_key_t { u32 pid; u32 tid; u32 syscall; }; BPF_HASH(syscall_start, u64, u64); BPF_HASH(syscall_data, struct syscall_data_key_t, struct syscall_data_t); TRACEPOINT_PROBE(raw_syscalls, sys_enter) { u64 pid_tgid = bpf_get_current_pid_tgid(); if (!capture_enabled(pid_tgid)) return 0; u64 t = bpf_ktime_get_ns(); syscall_start.update(&pid_tgid, &t); return 0; } TRACEPOINT_PROBE(raw_syscalls, sys_exit) { struct syscall_data_t *val, zero = {}; struct syscall_data_key_t key; u64 pid_tgid = bpf_get_current_pid_tgid(); if (!capture_enabled(pid_tgid)) return 0; key.pid = pid_tgid >> 32; key.tid = (u32)pid_tgid; key.syscall = args->id; u64 *start_ns = syscall_start.lookup(&pid_tgid); if (!start_ns) return 0; val = syscall_data.lookup_or_try_init(&key, &zero); if (val) { u64 delta = bpf_ktime_get_ns() - *start_ns; val->count++; val->total_ns += delta; if (val->worst_ns == 0 || delta > val->worst_ns) val->worst_ns = delta; if (<SYSCALL_TRACE_EVENTS>) { struct event_t *event = get_event(EVENT_SYSCALL); if (event) { event->syscall = args->id; event->entry_ts = *start_ns; if (<STACK_TRACE_ENABLED>) { event->user_stack_id = stack_traces.get_stackid( args, BPF_F_USER_STACK); event->kernel_stack_id = stack_traces.get_stackid( args, 0); } events.ringbuf_submit(event, 0); } } } return 0; } /* * For measuring the thread run time, we need the following. */ struct run_time_data_t { u64 count; u64 total_ns; u64 max_ns; u64 min_ns; }; struct pid_tid_key_t { u32 pid; u32 tid; }; BPF_HASH(run_start, u64, u64); BPF_HASH(run_data, struct pid_tid_key_t, struct run_time_data_t); static inline void thread_start_run(u64 pid_tgid, u64 ktime) { run_start.update(&pid_tgid, &ktime); } static inline void thread_stop_run(u32 pid, u32 tgid, u64 ktime) { u64 pid_tgid = (u64) tgid << 32 | pid; u64 *start_ns = run_start.lookup(&pid_tgid); if (!start_ns || *start_ns == 0) return; struct run_time_data_t *val, zero = {}; struct pid_tid_key_t key = { .pid = tgid, .tid = pid }; val = run_data.lookup_or_try_init(&key, &zero); if (val) { u64 delta = ktime - *start_ns; val->count++; val->total_ns += delta; if (val->max_ns == 0 || delta > val->max_ns) val->max_ns = delta; if (val->min_ns == 0 || delta < val->min_ns) val->min_ns = delta; } *start_ns = 0; } /* * For measuring the thread-ready delay, we need the following. */ struct ready_data_t { u64 count; u64 total_ns; u64 worst_ns; }; BPF_HASH(ready_start, u64, u64); BPF_HASH(ready_data, struct pid_tid_key_t, struct ready_data_t); static inline int sched_wakeup__(u32 pid, u32 tgid) { u64 pid_tgid = (u64) tgid << 32 | pid; if (!capture_enabled(pid_tgid)) return 0; u64 t = bpf_ktime_get_ns(); ready_start.update(&pid_tgid, &t); return 0; } RAW_TRACEPOINT_PROBE(sched_wakeup) { struct task_struct *t = (struct task_struct *)ctx->args[0]; return sched_wakeup__(t->pid, t->tgid); } RAW_TRACEPOINT_PROBE(sched_wakeup_new) { struct task_struct *t = (struct task_struct *)ctx->args[0]; return sched_wakeup__(t->pid, t->tgid); } RAW_TRACEPOINT_PROBE(sched_switch) { struct task_struct *prev = (struct task_struct *)ctx->args[1]; struct task_struct *next= (struct task_struct *)ctx->args[2]; u64 ktime = 0; if (!capture_enabled__()) return 0; if (prev-><STATE_FIELD> == TASK_RUNNING && prev->tgid == MONITOR_PID) sched_wakeup__(prev->pid, prev->tgid); if (prev->tgid == MONITOR_PID) { ktime = bpf_ktime_get_ns(); thread_stop_run(prev->pid, prev->tgid, ktime); } u64 pid_tgid = (u64)next->tgid << 32 | next->pid; if (next->tgid != MONITOR_PID) return 0; if (ktime == 0) ktime = bpf_ktime_get_ns(); u64 *start_ns = ready_start.lookup(&pid_tgid); if (start_ns && *start_ns != 0) { struct ready_data_t *val, zero = {}; struct pid_tid_key_t key = { .pid = next->tgid, .tid = next->pid }; val = ready_data.lookup_or_try_init(&key, &zero); if (val) { u64 delta = ktime - *start_ns; val->count++; val->total_ns += delta; if (val->worst_ns == 0 || delta > val->worst_ns) val->worst_ns = delta; } *start_ns = 0; } thread_start_run(pid_tgid, ktime); return 0; } /* * For measuring the hard irq time, we need the following. */ struct hardirq_start_data_t { u64 start_ns; char irq_name[32]; }; struct hardirq_data_t { u64 count; u64 total_ns; u64 worst_ns; }; struct hardirq_data_key_t { u32 pid; u32 tid; char irq_name[32]; }; BPF_HASH(hardirq_start, u64, struct hardirq_start_data_t); BPF_HASH(hardirq_data, struct hardirq_data_key_t, struct hardirq_data_t); TRACEPOINT_PROBE(irq, irq_handler_entry) { u64 pid_tgid = bpf_get_current_pid_tgid(); if (!capture_enabled(pid_tgid)) return 0; struct hardirq_start_data_t data = {}; data.start_ns = bpf_ktime_get_ns(); TP_DATA_LOC_READ_STR(&data.irq_name, name, sizeof(data.irq_name)); hardirq_start.update(&pid_tgid, &data); return 0; } TRACEPOINT_PROBE(irq, irq_handler_exit) { u64 pid_tgid = bpf_get_current_pid_tgid(); if (!capture_enabled(pid_tgid)) return 0; struct hardirq_start_data_t *data; data = hardirq_start.lookup(&pid_tgid); if (!data || data->start_ns == 0) return 0; if (args->ret != IRQ_NONE) { struct hardirq_data_t *val, zero = {}; struct hardirq_data_key_t key = { .pid = pid_tgid >> 32, .tid = (u32)pid_tgid }; bpf_probe_read_kernel(&key.irq_name, sizeof(key.irq_name), data->irq_name); val = hardirq_data.lookup_or_try_init(&key, &zero); if (val) { u64 delta = bpf_ktime_get_ns() - data->start_ns; val->count++; val->total_ns += delta; if (val->worst_ns == 0 || delta > val->worst_ns) val->worst_ns = delta; } } data->start_ns = 0; return 0; } /* * For measuring the soft irq time, we need the following. */ struct softirq_start_data_t { u64 start_ns; u32 vec_nr; }; struct softirq_data_t { u64 count; u64 total_ns; u64 worst_ns; }; struct softirq_data_key_t { u32 pid; u32 tid; u32 vec_nr; }; BPF_HASH(softirq_start, u64, struct softirq_start_data_t); BPF_HASH(softirq_data, struct softirq_data_key_t, struct softirq_data_t); TRACEPOINT_PROBE(irq, softirq_entry) { u64 pid_tgid = bpf_get_current_pid_tgid(); if (!capture_enabled(pid_tgid)) return 0; struct softirq_start_data_t data = {}; data.start_ns = bpf_ktime_get_ns(); data.vec_nr = args->vec; softirq_start.update(&pid_tgid, &data); return 0; } TRACEPOINT_PROBE(irq, softirq_exit) { u64 pid_tgid = bpf_get_current_pid_tgid(); if (!capture_enabled(pid_tgid)) return 0; struct softirq_start_data_t *data; data = softirq_start.lookup(&pid_tgid); if (!data || data->start_ns == 0) return 0; struct softirq_data_t *val, zero = {}; struct softirq_data_key_t key = { .pid = pid_tgid >> 32, .tid = (u32)pid_tgid, .vec_nr = data->vec_nr}; val = softirq_data.lookup_or_try_init(&key, &zero); if (val) { u64 delta = bpf_ktime_get_ns() - data->start_ns; val->count++; val->total_ns += delta; if (val->worst_ns == 0 || delta > val->worst_ns) val->worst_ns = delta; } data->start_ns = 0; return 0; } """ # # time_ns() # try: from time import time_ns except ImportError: # For compatibility with Python <= v3.6. def time_ns(): now = datetime.datetime.now() return int(now.timestamp() * 1e9) # # Probe class to use for the start/stop triggers # class Probe(object): ''' The goal for this object is to support as many as possible probe/events as supported by BCC. See https://github.com/iovisor/bcc/blob/master/docs/reference_guide.md#events--arguments ''' def __init__(self, probe, pid=None): self.pid = pid self.text_probe = probe self._parse_text_probe() def __str__(self): if self.probe_type == "usdt": return "[{}]; {}:{}:{}".format(self.text_probe, self.probe_type, self.usdt_provider, self.usdt_probe) elif self.probe_type == "trace": return "[{}]; {}:{}:{}".format(self.text_probe, self.probe_type, self.trace_system, self.trace_event) elif self.probe_type == "kprobe" or self.probe_type == "kretprobe": return "[{}]; {}:{}".format(self.text_probe, self.probe_type, self.kprobe_function) elif self.probe_type == "uprobe" or self.probe_type == "uretprobe": return "[{}]; {}:{}".format(self.text_probe, self.probe_type, self.uprobe_function) else: return "[{}] <{}:unknown probe>".format(self.text_probe, self.probe_type) def _raise(self, error): raise ValueError("[{}]; {}".format(self.text_probe, error)) def _verify_kprobe_probe(self): # Nothing to verify for now, just return. return def _verify_trace_probe(self): # Nothing to verify for now, just return. return def _verify_uprobe_probe(self): # Nothing to verify for now, just return. return def _verify_usdt_probe(self): if not self.pid: self._raise("USDT probes need a valid PID.") usdt = USDT(pid=self.pid) for probe in usdt.enumerate_probes(): if probe.provider.decode("utf-8") == self.usdt_provider and \ probe.name.decode("utf-8") == self.usdt_probe: return self._raise("Can't find UDST probe '{}:{}'".format(self.usdt_provider, self.usdt_probe)) def _parse_text_probe(self): ''' The text probe format is defined as follows: <probe_type>:<probe_specific> Types: USDT: u|usdt:<provider>:<probe> TRACE: t|trace:<system>:<event> KPROBE: k|kprobe:<kernel_function> KRETPROBE: kr|kretprobe:<kernel_function> UPROBE: up|uprobe:<function> URETPROBE: ur|uretprobe:<function> ''' args = self.text_probe.split(":") if len(args) <= 1: self._raise("Can't extract probe type.") if args[0] not in ["k", "kprobe", "kr", "kretprobe", "t", "trace", "u", "usdt", "up", "uprobe", "ur", "uretprobe"]: self._raise("Invalid probe type '{}'".format(args[0])) self.probe_type = "kprobe" if args[0] == "k" else args[0] self.probe_type = "kretprobe" if args[0] == "kr" else self.probe_type self.probe_type = "trace" if args[0] == "t" else self.probe_type self.probe_type = "usdt" if args[0] == "u" else self.probe_type self.probe_type = "uprobe" if args[0] == "up" else self.probe_type self.probe_type = "uretprobe" if args[0] == "ur" else self.probe_type if self.probe_type == "usdt": if len(args) != 3: self._raise("Invalid number of arguments for USDT") self.usdt_provider = args[1] self.usdt_probe = args[2] self._verify_usdt_probe() elif self.probe_type == "trace": if len(args) != 3: self._raise("Invalid number of arguments for TRACE") self.trace_system = args[1] self.trace_event = args[2] self._verify_trace_probe() elif self.probe_type == "kprobe" or self.probe_type == "kretprobe": if len(args) != 2: self._raise("Invalid number of arguments for K(RET)PROBE") self.kprobe_function = args[1] self._verify_kprobe_probe() elif self.probe_type == "uprobe" or self.probe_type == "uretprobe": if len(args) != 2: self._raise("Invalid number of arguments for U(RET)PROBE") self.uprobe_function = args[1] self._verify_uprobe_probe() def _get_kprobe_c_code(self, function_name, function_content): # # The kprobe__* do not require a function name, so it's # ignored in the code generation. # return """ int {}__{}(struct pt_regs *ctx) {{ {} }} """.format(self.probe_type, self.kprobe_function, function_content) def _get_trace_c_code(self, function_name, function_content): # # The TRACEPOINT_PROBE() do not require a function name, so it's # ignored in the code generation. # return """ TRACEPOINT_PROBE({},{}) {{ {} }} """.format(self.trace_system, self.trace_event, function_content) def _get_uprobe_c_code(self, function_name, function_content): return """ int {}(struct pt_regs *ctx) {{ {} }} """.format(function_name, function_content) def _get_usdt_c_code(self, function_name, function_content): return """ int {}(struct pt_regs *ctx) {{ {} }} """.format(function_name, function_content) def get_c_code(self, function_name, function_content): if self.probe_type == "kprobe" or self.probe_type == "kretprobe": return self._get_kprobe_c_code(function_name, function_content) elif self.probe_type == "trace": return self._get_trace_c_code(function_name, function_content) elif self.probe_type == "uprobe" or self.probe_type == "uretprobe": return self._get_uprobe_c_code(function_name, function_content) elif self.probe_type == "usdt": return self._get_usdt_c_code(function_name, function_content) return "" def probe_name(self): if self.probe_type == "kprobe" or self.probe_type == "kretprobe": return "{}".format(self.kprobe_function) elif self.probe_type == "trace": return "{}:{}".format(self.trace_system, self.trace_event) elif self.probe_type == "uprobe" or self.probe_type == "uretprobe": return "{}".format(self.uprobe_function) elif self.probe_type == "usdt": return "{}:{}".format(self.usdt_provider, self.usdt_probe) return "" # # event_to_dict() # def event_to_dict(event): return dict([(field, getattr(event, field)) for (field, _) in event._fields_ if isinstance(getattr(event, field), (int, bytes))]) # # Event enum # Event = IntEnum("Event", ["SYSCALL", "START_TRIGGER", "STOP_TRIGGER"], start=0) # # process_event() # def process_event(ctx, data, size): global start_trigger_ts global stop_trigger_ts event = bpf["events"].event(data) if event.id == Event.SYSCALL: syscall_events.append({"tid": event.tid, "ts_entry": event.entry_ts, "ts_exit": event.ts, "syscall": event.syscall, "user_stack_id": event.user_stack_id, "kernel_stack_id": event.kernel_stack_id}) elif event.id == Event.START_TRIGGER: # # This event would have started the trigger already, so all we need to # do is record the start timestamp. # start_trigger_ts = event.ts elif event.id == Event.STOP_TRIGGER: # # This event would have stopped the trigger already, so all we need to # do is record the start timestamp. stop_trigger_ts = event.ts # # next_power_of_two() # def next_power_of_two(val): np = 1 while np < val: np *= 2 return np # # unsigned_int() # def unsigned_int(value): try: value = int(value) except ValueError: raise argparse.ArgumentTypeError("must be an integer") if value < 0: raise argparse.ArgumentTypeError("must be positive") return value # # unsigned_nonzero_int() # def unsigned_nonzero_int(value): value = unsigned_int(value) if value == 0: raise argparse.ArgumentTypeError("must be nonzero") return value # # get_thread_name() # def get_thread_name(pid, tid): try: with open(f"/proc/{pid}/task/{tid}/comm", encoding="utf8") as f: return f.readline().strip("\n") except FileNotFoundError: pass return f"<unknown:{pid}/{tid}>" # # get_vec_nr_name() # def get_vec_nr_name(vec_nr): known_vec_nr = ["hi", "timer", "net_tx", "net_rx", "block", "irq_poll", "tasklet", "sched", "hrtimer", "rcu"] if vec_nr < 0 or vec_nr > len(known_vec_nr): return f"<unknown:{vec_nr}>" return known_vec_nr[vec_nr] # # start/stop/reset capture # def start_capture(): bpf["capture_on"][ct.c_int(0)] = ct.c_int(1) def stop_capture(force=False): if force: bpf["capture_on"][ct.c_int(0)] = ct.c_int(0xffff) else: bpf["capture_on"][ct.c_int(0)] = ct.c_int(0) def capture_running(): return bpf["capture_on"][ct.c_int(0)].value == 1 def reset_capture(): bpf["syscall_start"].clear() bpf["syscall_data"].clear() bpf["run_start"].clear() bpf["run_data"].clear() bpf["ready_start"].clear() bpf["ready_data"].clear() bpf["hardirq_start"].clear() bpf["hardirq_data"].clear() bpf["softirq_start"].clear() bpf["softirq_data"].clear() bpf["stack_traces"].clear() # # Display timestamp # def print_timestamp(msg): ltz = datetime.datetime.now() utc = ltz.astimezone(pytz.utc) time_string = "{} @{} ({} UTC)".format( msg, ltz.isoformat(), utc.strftime("%H:%M:%S")) print(time_string) # # process_results() # def process_results(syscall_events=None, trigger_delta=None): if trigger_delta: print_timestamp("# Triggered sample dump, stop-start delta {:,} ns". format(trigger_delta)) else: print_timestamp("# Sample dump") # # First get a list of all threads we need to report on. # threads_syscall = {k.tid for k, _ in bpf["syscall_data"].items() if k.syscall != 0xffffffff} threads_run = {k.tid for k, _ in bpf["run_data"].items() if k.pid != 0xffffffff} threads_ready = {k.tid for k, _ in bpf["ready_data"].items() if k.pid != 0xffffffff} threads_hardirq = {k.tid for k, _ in bpf["hardirq_data"].items() if k.pid != 0xffffffff} threads_softirq = {k.tid for k, _ in bpf["softirq_data"].items() if k.pid != 0xffffffff} threads = sorted(threads_syscall | threads_run | threads_ready | threads_hardirq | threads_softirq, key=lambda x: get_thread_name(options.pid, x)) # # Print header... # print("{:10} {:16} {}".format("TID", "THREAD", "<RESOURCE SPECIFIC>")) print("{:10} {:16} {}".format("-" * 10, "-" * 16, "-" * 76)) indent = 28 * " " # # Print all events/statistics per threads. # poll_id = [k for k, v in syscalls.items() if v == b"poll"][0] for thread in threads: if thread != threads[0]: print("") # # SYSCALL_STATISTICS # print("{:10} {:16} {}\n{}{:20} {:>6} {:>10} {:>16} {:>16}".format( thread, get_thread_name(options.pid, thread), "[SYSCALL STATISTICS]", indent, "NAME", "NUMBER", "COUNT", "TOTAL ns", "MAX ns")) total_count = 0 total_ns = 0 for k, v in sorted(filter(lambda t: t[0].tid == thread, bpf["syscall_data"].items()), key=lambda kv: -kv[1].total_ns): print("{}{:20.20} {:6} {:10} {:16,} {:16,}".format( indent, syscall_name(k.syscall).decode("utf-8"), k.syscall, v.count, v.total_ns, v.worst_ns)) if k.syscall != poll_id: total_count += v.count total_ns += v.total_ns if total_count > 0: print("{}{:20.20} {:6} {:10} {:16,}".format( indent, "TOTAL( - poll):", "", total_count, total_ns)) # # THREAD RUN STATISTICS # print("\n{:10} {:16} {}\n{}{:10} {:>16} {:>16} {:>16}".format( "", "", "[THREAD RUN STATISTICS]", indent, "SCHED_CNT", "TOTAL ns", "MIN ns", "MAX ns")) for k, v in filter(lambda t: t[0].tid == thread, bpf["run_data"].items()): print("{}{:10} {:16,} {:16,} {:16,}".format( indent, v.count, v.total_ns, v.min_ns, v.max_ns)) # # THREAD READY STATISTICS # print("\n{:10} {:16} {}\n{}{:10} {:>16} {:>16}".format( "", "", "[THREAD READY STATISTICS]", indent, "SCHED_CNT", "TOTAL ns", "MAX ns")) for k, v in filter(lambda t: t[0].tid == thread, bpf["ready_data"].items()): print("{}{:10} {:16,} {:16,}".format( indent, v.count, v.total_ns, v.worst_ns)) # # HARD IRQ STATISTICS # total_ns = 0 total_count = 0 header_printed = False for k, v in sorted(filter(lambda t: t[0].tid == thread, bpf["hardirq_data"].items()), key=lambda kv: -kv[1].total_ns): if not header_printed: print("\n{:10} {:16} {}\n{}{:20} {:>10} {:>16} {:>16}". format("", "", "[HARD IRQ STATISTICS]", indent, "NAME", "COUNT", "TOTAL ns", "MAX ns")) header_printed = True print("{}{:20.20} {:10} {:16,} {:16,}".format( indent, k.irq_name.decode("utf-8"), v.count, v.total_ns, v.worst_ns)) total_count += v.count total_ns += v.total_ns if total_count > 0: print("{}{:20.20} {:10} {:16,}".format( indent, "TOTAL:", total_count, total_ns)) # # SOFT IRQ STATISTICS # total_ns = 0 total_count = 0 header_printed = False for k, v in sorted(filter(lambda t: t[0].tid == thread, bpf["softirq_data"].items()), key=lambda kv: -kv[1].total_ns): if not header_printed: print("\n{:10} {:16} {}\n" "{}{:20} {:>7} {:>10} {:>16} {:>16}". format("", "", "[SOFT IRQ STATISTICS]", indent, "NAME", "VECT_NR", "COUNT", "TOTAL ns", "MAX ns")) header_printed = True print("{}{:20.20} {:>7} {:10} {:16,} {:16,}".format( indent, get_vec_nr_name(k.vec_nr), k.vec_nr, v.count, v.total_ns, v.worst_ns)) total_count += v.count total_ns += v.total_ns if total_count > 0: print("{}{:20.20} {:7} {:10} {:16,}".format( indent, "TOTAL:", "", total_count, total_ns)) # # Print events # lost_stack_traces = 0 if syscall_events: stack_traces = bpf.get_table("stack_traces") print("\n\n# SYSCALL EVENTS:" "\n{}{:>19} {:>19} {:>10} {:16} {:>10} {}".format( 2 * " ", "ENTRY (ns)", "EXIT (ns)", "TID", "COMM", "DELTA (us)", "SYSCALL")) print("{}{:19} {:19} {:10} {:16} {:10} {}".format( 2 * " ", "-" * 19, "-" * 19, "-" * 10, "-" * 16, "-" * 10, "-" * 16)) for event in syscall_events: print("{}{:19} {:19} {:10} {:16} {:10,} {}".format( " " * 2, event["ts_entry"], event["ts_exit"], event["tid"], get_thread_name(options.pid, event["tid"]), int((event["ts_exit"] - event["ts_entry"]) / 1000), syscall_name(event["syscall"]).decode("utf-8"))) # # Not sure where to put this, but I'll add some info on stack # traces here... Userspace stack traces are very limited due to # the fact that bcc does not support dwarf backtraces. As OVS # gets compiled without frame pointers we will not see much. # If however, OVS does get built with frame pointers, we should not # use the BPF_STACK_TRACE_BUILDID as it does not seem to handle # the debug symbols correctly. Also, note that for kernel # traces you should not use BPF_STACK_TRACE_BUILDID, so two # buffers are needed. # # Some info on manual dwarf walk support: # https://github.com/iovisor/bcc/issues/3515 # https://github.com/iovisor/bcc/pull/4463 # if options.stack_trace_size == 0: continue if event["kernel_stack_id"] < 0 or event["user_stack_id"] < 0: lost_stack_traces += 1 kernel_stack = stack_traces.walk(event["kernel_stack_id"]) \ if event["kernel_stack_id"] >= 0 else [] user_stack = stack_traces.walk(event["user_stack_id"]) \ if event["user_stack_id"] >= 0 else [] for addr in kernel_stack: print("{}{}".format( " " * 10, bpf.ksym(addr, show_module=True, show_offset=True).decode("utf-8", "replace"))) for addr in user_stack: addr_str = bpf.sym(addr, options.pid, show_module=True, show_offset=True).decode("utf-8", "replace") if addr_str == "[unknown]": addr_str += " 0x{:x}".format(addr) print("{}{}".format(" " * 10, addr_str)) # # Print any footer messages. # if lost_stack_traces > 0: print("\n#WARNING: We where not able to display {} stack traces!\n" "# Consider increasing the stack trace size using\n" "# the '--stack-trace-size' option.\n" "# Note that this can also happen due to a stack id\n" "# collision.".format(lost_stack_traces)) # # main() # def main(): # # Don't like these globals, but ctx passing does not seem to work with the # existing open_ring_buffer() API :( # global bpf global options global syscall_events global start_trigger_ts global stop_trigger_ts start_trigger_ts = 0 stop_trigger_ts = 0 # # Argument parsing # parser = argparse.ArgumentParser() parser.add_argument("-D", "--debug", help="Enable eBPF debugging", type=int, const=0x3f, default=0, nargs="?") parser.add_argument("-p", "--pid", metavar="VSWITCHD_PID", help="ovs-vswitch's PID", type=unsigned_int, default=None) parser.add_argument("-s", "--syscall-events", metavar="DURATION_NS", help="Record syscall events that take longer than " "DURATION_NS. Omit the duration value to record all " "syscall events", type=unsigned_int, const=0, default=None, nargs="?") parser.add_argument("--buffer-page-count", help="Number of BPF ring buffer pages, default 1024", type=unsigned_int, default=1024, metavar="NUMBER") parser.add_argument("--sample-count", help="Number of sample runs, default 1", type=unsigned_nonzero_int, default=1, metavar="RUNS") parser.add_argument("--sample-interval", help="Delay between sample runs, default 0", type=float, default=0, metavar="SECONDS") parser.add_argument("--sample-time", help="Sample time, default 0.5 seconds", type=float, default=0.5, metavar="SECONDS") parser.add_argument("--skip-syscall-poll-events", help="Skip poll() syscalls with --syscall-events", action="store_true") parser.add_argument("--stack-trace-size", help="Number of unique stack traces that can be " "recorded, default 4096. 0 to disable", type=unsigned_int, default=4096) parser.add_argument("--start-trigger", metavar="TRIGGER", help="Start trigger, see documentation for details", type=str, default=None) parser.add_argument("--stop-trigger", metavar="TRIGGER", help="Stop trigger, see documentation for details", type=str, default=None) parser.add_argument("--trigger-delta", metavar="DURATION_NS", help="Only report event when the trigger duration > " "DURATION_NS, default 0 (all events)", type=unsigned_int, const=0, default=0, nargs="?") options = parser.parse_args() # # Find the PID of the ovs-vswitchd daemon if not specified. # if not options.pid: for proc in psutil.process_iter(): if "ovs-vswitchd" in proc.name(): if options.pid: print("ERROR: Multiple ovs-vswitchd daemons running, " "use the -p option!") sys.exit(os.EX_NOINPUT) options.pid = proc.pid # # Error checking on input parameters. # if not options.pid: print("ERROR: Failed to find ovs-vswitchd's PID!") sys.exit(os.EX_UNAVAILABLE) options.buffer_page_count = next_power_of_two(options.buffer_page_count) # # Make sure we are running as root, or else we can not attach the probes. # if os.geteuid() != 0: print("ERROR: We need to run as root to attached probes!") sys.exit(os.EX_NOPERM) # # Setup any of the start stop triggers # if options.start_trigger is not None: try: start_trigger = Probe(options.start_trigger, pid=options.pid) except ValueError as e: print(f"ERROR: Invalid start trigger {str(e)}") sys.exit(os.EX_CONFIG) else: start_trigger = None if options.stop_trigger is not None: try: stop_trigger = Probe(options.stop_trigger, pid=options.pid) except ValueError as e: print(f"ERROR: Invalid stop trigger {str(e)}") sys.exit(os.EX_CONFIG) else: stop_trigger = None # # Attach probe to running process. # source = EBPF_SOURCE.replace("<EVENT_ENUM>", "\n".join( [" EVENT_{} = {},".format( event.name, event.value) for event in Event])) source = source.replace("<BUFFER_PAGE_CNT>", str(options.buffer_page_count)) source = source.replace("<MONITOR_PID>", str(options.pid)) if BPF.kernel_struct_has_field(b"task_struct", b"state") == 1: source = source.replace("<STATE_FIELD>", "state") else: source = source.replace("<STATE_FIELD>", "__state") poll_id = [k for k, v in syscalls.items() if v == b"poll"][0] if options.syscall_events is None: syscall_trace_events = "false" elif options.syscall_events == 0: if not options.skip_syscall_poll_events: syscall_trace_events = "true" else: syscall_trace_events = f"args->id != {poll_id}" else: syscall_trace_events = "delta > {}".format(options.syscall_events) if options.skip_syscall_poll_events: syscall_trace_events += f" && args->id != {poll_id}" source = source.replace("<SYSCALL_TRACE_EVENTS>", syscall_trace_events) source = source.replace("<STACK_TRACE_SIZE>", str(options.stack_trace_size)) source = source.replace("<STACK_TRACE_ENABLED>", "true" if options.stack_trace_size > 0 else "false") # # Handle start/stop probes # if start_trigger: source = source.replace("<START_TRIGGER>", start_trigger.get_c_code( "start_trigger_probe", "return start_trigger();")) else: source = source.replace("<START_TRIGGER>", "") if stop_trigger: source = source.replace("<STOP_TRIGGER>", stop_trigger.get_c_code( "stop_trigger_probe", "return stop_trigger();")) else: source = source.replace("<STOP_TRIGGER>", "") # # Setup usdt or other probes that need handling trough the BFP class. # usdt = USDT(pid=int(options.pid)) try: if start_trigger and start_trigger.probe_type == "usdt": usdt.enable_probe(probe=start_trigger.probe_name(), fn_name="start_trigger_probe") if stop_trigger and stop_trigger.probe_type == "usdt": usdt.enable_probe(probe=stop_trigger.probe_name(), fn_name="stop_trigger_probe") except USDTException as e: print("ERROR: {}".format( (re.sub("^", " " * 7, str(e), flags=re.MULTILINE)).strip(). replace("--with-dtrace or --enable-dtrace", "--enable-usdt-probes"))) sys.exit(os.EX_OSERR) bpf = BPF(text=source, usdt_contexts=[usdt], debug=options.debug) if start_trigger: try: if start_trigger.probe_type == "uprobe": bpf.attach_uprobe(name=f"/proc/{options.pid}/exe", sym=start_trigger.probe_name(), fn_name="start_trigger_probe", pid=options.pid) if start_trigger.probe_type == "uretprobe": bpf.attach_uretprobe(name=f"/proc/{options.pid}/exe", sym=start_trigger.probe_name(), fn_name="start_trigger_probe", pid=options.pid) except Exception as e: print("ERROR: Failed attaching uprobe start trigger " f"'{start_trigger.probe_name()}';\n {str(e)}") sys.exit(os.EX_OSERR) if stop_trigger: try: if stop_trigger.probe_type == "uprobe": bpf.attach_uprobe(name=f"/proc/{options.pid}/exe", sym=stop_trigger.probe_name(), fn_name="stop_trigger_probe", pid=options.pid) if stop_trigger.probe_type == "uretprobe": bpf.attach_uretprobe(name=f"/proc/{options.pid}/exe", sym=stop_trigger.probe_name(), fn_name="stop_trigger_probe", pid=options.pid) except Exception as e: print("ERROR: Failed attaching uprobe stop trigger" f"'{stop_trigger.probe_name()}';\n {str(e)}") sys.exit(os.EX_OSERR) # # If no triggers are configured use the delay configuration # bpf["events"].open_ring_buffer(process_event) sample_count = 0 while sample_count < options.sample_count: sample_count += 1 syscall_events = [] if not options.start_trigger: print_timestamp("# Start sampling") start_capture() stop_time = -1 if options.stop_trigger else \ time_ns() + options.sample_time * 1000000000 else: # For start triggers the stop time depends on the start trigger # time, or depends on the stop trigger if configured. stop_time = -1 if options.stop_trigger else 0 while True: keyboard_interrupt = False try: last_start_ts = start_trigger_ts last_stop_ts = stop_trigger_ts if stop_time > 0: delay = int((stop_time - time_ns()) / 1000000) if delay <= 0: break else: delay = -1 bpf.ring_buffer_poll(timeout=delay) if stop_time <= 0 and last_start_ts != start_trigger_ts: print_timestamp( "# Start sampling (trigger@{})".format( start_trigger_ts)) if not options.stop_trigger: stop_time = time_ns() + \ options.sample_time * 1000000000 if last_stop_ts != stop_trigger_ts: break except KeyboardInterrupt: keyboard_interrupt = True break if options.stop_trigger and not capture_running(): print_timestamp("# Stop sampling (trigger@{})".format( stop_trigger_ts)) else: print_timestamp("# Stop sampling") if stop_trigger_ts != 0 and start_trigger_ts != 0: trigger_delta = stop_trigger_ts - start_trigger_ts else: trigger_delta = None if not trigger_delta or trigger_delta >= options.trigger_delta: stop_capture(force=True) # Prevent a new trigger to start. process_results(syscall_events=syscall_events, trigger_delta=trigger_delta) elif trigger_delta: sample_count -= 1 print_timestamp("# Sample dump skipped, delta {:,} ns".format( trigger_delta)) reset_capture() stop_capture() if keyboard_interrupt: break if options.sample_interval > 0: time.sleep(options.sample_interval) # # Report lost events. # dropcnt = bpf.get_table("dropcnt") for k in dropcnt.keys(): count = dropcnt.sum(k).value if k.value == 0 and count > 0: print("\n# WARNING: Not all events were captured, {} were " "dropped!\n# Increase the BPF ring buffer size " "with the --buffer-page-count option.".format(count)) if options.sample_count > 1: trigger_miss = bpf.get_table("trigger_miss") for k in trigger_miss.keys(): count = trigger_miss.sum(k).value if k.value == 0 and count > 0: print("\n# WARNING: Not all start triggers were successful. " "{} were missed due to\n# slow userspace " "processing!".format(count)) # # Start main() as the default entry point... # if __name__ == "__main__": main()