summaryrefslogtreecommitdiffstats
path: root/src/agent.c
blob: 01f9b07773195a3c22e26621ba4dd203b4bb3e54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/* Copyright 2019 Red Hat

   Licensed under the Apache License, Version 2.0 (the "License"); you may
   not use this file except in compliance with the License. You may obtain
   a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
   License for the specific language governing permissions and limitations
   under the License. */

/* The goal of this program is to collect process informations.
*/
#define BPF_LICENSE GPL
#include <linux/sched.h>

// Internal data to record next task start time
BPF_HASH(start_time, u32, u64, PID_MAX);
// Shared data of tgid and oncpu time
BPF_HASH(oncpus, u32, u64, PID_MAX);

/*
RAW_TRACEPOINT_PROBE(sched_switch)
{
  u64 cur_time = bpf_ktime_get_ns();
  struct task_struct *prev = (struct task_struct *)ctx->args[1];
  struct task_struct *next= (struct task_struct *)ctx->args[2];

  u32 pid;
  u32 tgid;
  bpf_probe_read(&pid, sizeof(prev->pid), &prev->pid);
  bpf_probe_read(&tgid, sizeof(prev->tgid), &prev->tgid);

  if (tgid) {
    u64 *prev_time = start_time.lookup(&pid);
    if (prev_time != NULL) {
      // Previous task start time was recorded, compute the time it spent oncpu
      u64 delta = (cur_time - *prev_time);
      if (delta > 0 && delta < INTERVAL_NS) {
        // Per tgid cpu info
        u64 *oncpu = oncpus.lookup(&tgid);
        if (oncpu != NULL) {
          delta += *oncpu;
        }
        // Record time per task group
        oncpus.update(&tgid, &delta);
      }
    }
  }

  // Record the start time of the next
  u32 next_pid;

  bpf_probe_read(&next_pid, sizeof(next->pid), &next->pid);
  cur_time = bpf_ktime_get_ns();
  start_time.update(&next_pid, &cur_time);
  return 0;
}
*/

// Each time the scheduler switch a task this function get called
int finish_task_switch(struct pt_regs *ctx, struct task_struct *prev)
{
  u64 cur_time = bpf_ktime_get_ns();
  u32 pid = prev->pid;
  u32 tgid = prev->tgid;
  if (tgid) {
    u64 *prev_time = start_time.lookup(&pid);
    if (prev_time != NULL) {
      // Previous task start time was recorded, compute the time it spent oncpu
      u64 delta = (cur_time - *prev_time);
      if (delta > 0 && delta < INTERVAL_NS) {
        // Per tgid cpu info
        u64 *oncpu = oncpus.lookup(&tgid);
        if (oncpu != NULL) {
          delta += *oncpu;
        }
        // Record time per task group
        oncpus.update(&tgid, &delta);
      }
    }
  }

  // Record the start time of the next task
  u32 next_pid = bpf_get_current_pid_tgid() & 0xffffffff;
  cur_time = bpf_ktime_get_ns();
  start_time.update(&next_pid, &cur_time);
  return 0;
}


// The exec perf channel
BPF_PERF_OUTPUT(execs);

#define MAXARGS  8
#define ARGSIZE  128

enum execs_perf_type {
                      EVENT_TYPE_INIT,
                      EVENT_TYPE_ARGS,
                      EVENT_TYPE_EXEC,
                      EVENT_TYPE_EXIT,
                      EVENT_TYPE_FORK,
};

struct exec_info_t {
  enum execs_perf_type type;
  u32 pid;
  u32 ppid;
  u32 cgroup;
  char arg[ARGSIZE];
};

static int submit_arg(struct pt_regs *ctx, void *ptr, struct exec_info_t *inf)
{
  const char *argp = NULL;
  bpf_probe_read(&argp, sizeof(argp), ptr);
  if (argp) {
    bpf_probe_read(&inf->arg, sizeof(inf->arg), argp);
    if (inf->arg[0]) {
      execs.perf_submit(ctx, inf, sizeof(struct exec_info_t));
    }
    return 1;
  }
  return 0;
}

int syscall__execve(struct pt_regs *ctx,
                    const char __user *filename,
                    const char __user *const __user *__argv,
                    const char __user *const __user *__envp)
{
  // Send initial info
  struct task_struct *tsk = (struct task_struct *)bpf_get_current_task();
  struct exec_info_t inf = {};
  inf.type = EVENT_TYPE_INIT;
  inf.pid = tsk->tgid;
  inf.ppid = tsk->real_parent->tgid;
  inf.cgroup = bpf_get_current_cgroup_id() & 0xffffffff;
  bpf_probe_read(inf.arg, sizeof(inf.arg), filename);
  execs.perf_submit(ctx, &inf, sizeof(inf));

  // Send argv
  inf.type = EVENT_TYPE_ARGS;
#pragma unroll
  for (int i = 1; i < MAXARGS; i++) {
    if (submit_arg(ctx, (void *)&__argv[i], &inf) == 0)
      break;
  }
  return 0;
}

int do_ret_sys_execve(struct pt_regs *ctx)
{
  struct task_struct *tsk = (struct task_struct *)bpf_get_current_task();
  struct exec_info_t inf = {};
  inf.pid = tsk->tgid;
  inf.ppid = tsk->real_parent->tgid;
  inf.type = EVENT_TYPE_EXEC;
  // EVENT_TYPE_EXEC store the exec success status in the cgroup field
  inf.cgroup = PT_REGS_RC(ctx);
  execs.perf_submit(ctx, &inf, sizeof(inf));
  return 0;
}

/* // tracepoint to collect threads
TRACEPOINT_PROBE(sched, sched_process_exec)
//  int syscall__clone(struct pt_regs *ctx) {
{
  struct task_struct *tsk = (struct task_struct *)bpf_get_current_task();
  struct exec_info_t inf = {};
  inf.type = EVENT_TYPE_INIT;
  inf.pid = tsk->tgid; //bpf_get_current_pid_tgid() >> 32;
  //inf.pid = tsk->pidPT_REGS_RC(ctx);
  inf.ppid = tsk->real_parent->tgid;
  inf.cgroup = bpf_get_current_cgroup_id() & 0xffffffff;
  bpf_get_current_comm(&inf.arg, sizeof(inf.arg));
  execs.perf_submit(args, &inf, sizeof(inf));
  return 0;
}
*/

TRACEPOINT_PROBE(sched, sched_process_fork)
{
  struct exec_info_t inf = {};
  inf.type = EVENT_TYPE_FORK;
  inf.pid = args->child_pid;
  inf.ppid = args->parent_pid;
  inf.cgroup = bpf_get_current_cgroup_id() & 0xffffffff;
  execs.perf_submit(args, &inf, sizeof(inf));
  return 0;
}

TRACEPOINT_PROBE(sched, sched_process_exit)
{
  struct task_struct *tsk = (struct task_struct *)bpf_get_current_task();
  if (tsk->pid != tsk->tgid) {
    // thread died
    return 0;
  }
  struct exec_info_t inf = {};
  inf.pid = tsk->tgid;
  inf.type = EVENT_TYPE_EXIT;
  inf.ppid = tsk->real_parent->tgid;
  // EVENT_TYPE_EXIT store the exit code in the cgroup field
  inf.cgroup = tsk->exit_code >> 8;
  bpf_get_current_comm(&inf.arg, sizeof(inf.arg));
  execs.perf_submit(args, &inf, sizeof(inf));
  return 0;
}