我们先将test.txt文件的内容清空,然后模拟去访问nginx
[root@localhost tmp]# cat test.txt {{EPOLLIN, {u32=1901031440, u64=140133798998032}}}, 512, 4294967295) = 1 3824 05:57:59.271342 accept4(6, {sa_family=AF_INET, sin_port=htons(64469), sin_addr=inet_addr("192.168.0.254")}, [16], SOCK_NONBLOCK) = 3 3824 05:57:59.271383 epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLET|0x2000, {u32=1901031888, u64=140133798998480}}) = 0 3824 05:57:59.271401 epoll_wait(8, {{EPOLLIN, {u32=1901031888, u64=140133798998480}}}, 512, 60000) = 1 3824 05:57:59.271423 recvfrom(3, "GET / HTTP/1.1\r\nHost: 192.168.0."..., 1024, 0, NULL, NULL) = 308 #recvfrom接收到get请求 3824 05:57:59.271483 stat("/usr/local/nginx/html/www/index.html", 0x7fff69246350) = -1 ENOENT (No such file or directory) #查看index.html文件不存在 3824 05:57:59.271506 stat("/usr/local/nginx/html/www", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 3824 05:57:59.271525 stat("/usr/local/nginx/html/www/index.htm", 0x7fff69246350) = -1 ENOENT (No such file or directory) #查看index.htm文件不存在 #下面向用户返回403错误,并写入错误日志 3824 05:57:59.271546 write(5, "2018/01/14 05:57:59 [error] 3824"..., 200) = 200 3824 05:57:59.271587 writev(3, [{"HTTP/1.1 403 Forbidden\r\nServer: "..., 155}, {"<html>\r\n<head><title>403 Forbidd"..., 116}, {"<hr><center>nginx/1.10.2</center"..., 53}], 3) = 324 3824 05:57:59.271674 write(4, "192.168.0.254 - - [14/Jan/2018:0"..., 151) = 151 3824 05:57:59.271693 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0 3824 05:57:59.271708 recvfrom(3, 0x1f3e190, 1024, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable) 3824 05:57:59.271722 epoll_wait(8, #从上面的日志输出中,我们可以得知是因为2个文件不存在导致的403错误,因此我们检查配置文件就很容易发现问题。 server { listen 80; server_name ; location / { root html/www; index index.html index.htm; #这里缺少了设置首页文件 } }(2)只跟踪与文件操作有关的系统调用
如果命令结果的输出实在太多了,很容易看花眼,因此可以使用过滤器,过滤掉无关的信息,比如只查看文件操作信息。
[root@localhost tmp]# strace -tt -f -e trace=file -o /tmp/test.txt /usr/local/nginx/sbin/nginx [root@localhost tmp]# cat test.txt 3860 06:28:42.306924 stat("/usr/local/nginx/html/www/index.html", 0x7fff3ce39670) = -1 ENOENT (No such file or directory) 3860 06:28:42.306973 stat("/usr/local/nginx/html/www", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 3860 06:28:42.306994 stat("/usr/local/nginx/html/www/index.htm", 0x7fff3ce39670) = -1 ENOENT (No such file or directory)(3)通过pid跟踪进程
[root@localhost tmp]# /usr/local/nginx/sbin/nginx #启动nginx服务 [root@localhost tmp]# pgrep nginx -l 3873 nginx #nginx的master进程 3874 nginx #nginx的worker进程 [root@localhost tmp]# strace -tt -f -e trace=file -p 3874 #使用-p参数,只跟踪worker进程,结果更加精简 Process 3874 attached - interrupt to quit 06:49:38.629248 stat("/usr/local/nginx/html/www/index.html", 0x7fffbd4e7fc0) = -1 ENOENT (No such file or directory) 06:49:38.629329 stat("/usr/local/nginx/html/www", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 06:49:38.629353 stat("/usr/local/nginx/html/www/index.htm", 0x7fffbd4e7fc0) = -1 ENOENT (No such file or directory)(4)跟踪系统调用统计
strace不仅能够追踪系统调用,使用选项-c还能对进程所有的系统调用做一个统计分析
[root@localhost tmp]# strace -c /usr/local/nginx/sbin/nginx #使用-c参数为进程所有的系统调用做一个统计分析 % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 100.00 0.000047 47 1 clone 0.00 0.000000 0 29 read 0.00 0.000000 0 31 open 0.00 0.000000 0 33 close 0.00 0.000000 0 6 stat 0.00 0.000000 0 28 fstat 0.00 0.000000 0 1 lseek 0.00 0.000000 0 61 mmap 0.00 0.000000 0 36 mprotect 0.00 0.000000 0 7 munmap 0.00 0.000000 0 6 brk 0.00 0.000000 0 14 rt_sigaction 0.00 0.000000 0 1 rt_sigprocmask 0.00 0.000000 0 1 ioctl 0.00 0.000000 0 2 pread 0.00 0.000000 0 2 2 access 0.00 0.000000 0 5 socket 0.00 0.000000 0 4 4 connect 0.00 0.000000 0 1 bind 0.00 0.000000 0 2 listen 0.00 0.000000 0 1 setsockopt 0.00 0.000000 0 1 execve 0.00 0.000000 0 2 uname 0.00 0.000000 0 3 fcntl 0.00 0.000000 0 5 5 mkdir 0.00 0.000000 0 3 getrlimit 0.00 0.000000 0 1 geteuid 0.00 0.000000 0 1 statfs 0.00 0.000000 0 1 arch_prctl 0.00 0.000000 0 2 1 futex 0.00 0.000000 0 1 epoll_create 0.00 0.000000 0 1 set_tid_address 0.00 0.000000 0 1 set_robust_list ------ ----------- ----------- --------- --------- ---------------- 100.00 0.000047 294 12 total上面的结果将清楚地告诉我们调用了哪些系统函数,调用的次数是多少,消耗了多少时间等信息,这对我们分析程序来说是非常有用的。