Crash工具实战-结构体解析
系统崩溃,死机,卡顿等问题经常遇到,但又很棘手,这里推荐个分析神器。
Crash 工具用于解析 kdump 抓取的 vmcore信息,如之前分析,vmcore 实际为系统运行当时的内存镜像,其中包括了所有的内存中可以看到的信息,通过 Crash 工具可以解析 vmcore 中的详细数据,本文主要以 sk_buff 数据结构为例简单说明 Crash 中间中对结构体的解析。
基本用法
Crash中使用struct命令解析结构体,具体用法为:
[struct] <结构体名称> <结构体虚拟地址>
其中,[struct] 可以不用。
示例
启动crash
crash vmlinux vmcore
找到sk_buff结构体地址
bt 命令查看当前上下文的寄存器和堆栈信息。
crash> bt
PID: 27528 TASK: ffff88108e1d00c0 CPU: 6 COMMAND: "ZMSSMediaProces"
#0 [ffff88106f035740] machine_kexec at ffffffff8103237b
#1 [ffff88106f0357a0] crash_kexec at ffffffff810ba552
#2 [ffff88106f035870] oops_end at ffffffff814fc6c0
#3 [ffff88106f0358a0] die at ffffffff8100f31f
#4 [ffff88106f0358d0] do_general_protection at ffffffff814fc242
#5 [ffff88106f035900] general_protection at ffffffff814fba15
[exception RIP: put_page+9]
RIP: ffffffff8112d3a9 RSP: ffff88106f0359b8 RFLAGS: 00010206
RAX: 0000000000000030 RBX: 0000000000000001 RCX: ffff881161aa4800
RDX: ffff881161aa4e00 RSI: ffff88114b4f0c08 RDI: 4002030400585a00
RBP: ffff88106f0359b8 R8: ffff88106f0358b4 R9: 00000000fffffff2
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8810dd32f280
R13: 00000000000005dc R14: ffff88106f035f18 R15: ffffffff81470720
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#6 [ffff88106f0359c0] skb_release_data at ffffffff8142c77f
#7 [ffff88106f0359e0] __kfree_skb at ffffffff8142c2fe
#8 [ffff88106f035a00] kfree_skb at ffffffff8142c442
#9 [ffff88106f035a30] __ip_flush_pending_frames at ffffffff814706e3
#10 [ffff88106f035a50] ip_flush_pending_frames at ffffffff8147071c
#11 [ffff88106f035a60] udp_flush_pending_frames at ffffffff81495be0
#12 [ffff88106f035a70] udp_sendmsg at ffffffff814960f6
#13 [ffff88106f035b70] inet_sendmsg at ffffffff8149e34a
#14 [ffff88106f035bb0] sock_sendmsg at ffffffff81424b0a
#15 [ffff88106f035d60] __sys_sendmsg at ffffffff81424fbf
#16 [ffff88106f035f10] sys_sendmsg at ffffffff81425c79
#17 [ffff88106f035f80] system_call_fastpath at ffffffff8100b0f2
RIP: 000000385ea0eadd RSP: 00007fa8e25cebd0 RFLAGS: 00010202
RAX: 000000000000002e RBX: ffffffff8100b0f2 RCX: 00007fa8e25cfa0f
RDX: 0000000000008000 RSI: 00007fa8e25ce6d0 RDI: 0000000000000538
RBP: 00007fa66c18c500 R8: 000000000000001c R9: 00007fa8e25cf700
R10: 00007fa8e25ce6d0 R11: 0000000000000293 R12: 0000000000000000
R13: 00007fa8e25ce9c0 R14: 00007fa8e25ced40 R15: 00007fa8e25ce9c0
ORIG_RAX: 000000000000002e CS: 0033 SS: 002b
反汇编,确认 skb 数据结构的地址:
crash> dis -l skb_release_data
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/core/skbuff.c: 340
0xffffffff8142c700 <skb_release_data>: push %rbp
0xffffffff8142c701 <skb_release_data+1>: mov %rsp,%rbp
0xffffffff8142c704 <skb_release_data+4>: push %r12
0xffffffff8142c706 <skb_release_data+6>: push %rbx
0xffffffff8142c707 <skb_release_data+7>: nopl 0x0(%rax,%rax,1)
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/core/skbuff.c: 341
0xffffffff8142c70c <skb_release_data+12>: movzbl 0x7c(%rdi),%eax
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/core/skbuff.c: 340
0xffffffff8142c710 <skb_release_data+16>: mov %rdi,%r12
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/core/skbuff.c: 341
0xffffffff8142c713 <skb_release_data+19>: test $0x2,%al
0xffffffff8142c715 <skb_release_data+21>: je 0xffffffff8142c742 <skb_release_data+66>
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/core/skbuff.c: 342
结合代码,可以确认 skb 变量地址通过 r12 寄存器,则 r12 寄存器中保存的即为 skb 变量的地址,地址为:ffff8810dd32f280
通过变量地址打印,结构体内容
crash> sk_buff ffff8810dd32f280
struct sk_buff {
next = 0x0,
prev = 0x0,
sk = 0xffff88114b4f0b40,
tstamp = {
tv64 = 0
},
dev = 0x0,
_skb_dst = 0,
sp = 0x0,
cb = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
struct sk_buff {
next = 0x0,
prev = 0x0,
sk = 0xffff88114b4f0b40,
tstamp = {
tv64 = 0
},
dev = 0x0,
_skb_dst = 0,
sp = 0x0,
cb = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\00
0\000",
len = 1500,
data_len = 0,
mac_len = 0,
hdr_len = 0,
{
csum = 1527250250,
{
csum_start = 64842,
csum_offset = 23303
}
},
priority = 0,
flags1_begin = 0xffff8810dd32f2fc,
local_df = 0 '\000',
cloned = 0 '\000',
ip_summed = 0 '\000',
nohdr = 0 '\000',
nfctinfo = 0 '\000',
pkt_type = 0 '\000',
fclone = 0 '\000',
ipvs_property = 0 '\000',
peeked = 0 '\000',
nf_trace = 0 '\000',
protocol = 0,
...
transport_header = 76,
network_header = 16,
mac_header = 4294967295,
tail = 1516,
end = 1536,
head = 0xffff881161aa4800 "",
data = 0xffff881161aa4810 "",
truesize = 1768,
users = {
counter = 1
}
通过 sk_buff 结构体内容,确认 head 和 data 指针,通过 x 命令查看地址内容,其中 16xg 表示查看 0x16(十六进制)长度的数据
crash> x/16xg 0xffff881161aa4800
0xffff881161aa4800: 0x0000000000000000 0x0008000000000000
0xffff881161aa4810: 0x0000000000000000 0x884509ff00000000
0xffff881161aa4820: 0x113d000001006005 0x007fe200f20a27ed
0xffff881161aa4830: 0x4c0508a930310900 0xb1d2745121900000
0xffff881161aa4840: 0x585a547439439fff 0x0000004b02e30400
0xffff881161aa4850: 0x4002030400585a00 0x017306e8a8ed53c9
0xffff881161aa4860: 0xca21900000a40559 0x0000004b5f2b1275
0xffff881161aa4870: 0x4002830400585a00 0x017306e8a8ed53c9
通过 sk_buff 的 head 和 end 成员,确认 skb_shared_info 结构的内容(该机构存放于 end 之后),即其地址为 0xffff881161aa4800+1536(end)=0xffff881161aa4E00
crash> skb_shared_info 0xffff881161aa4E00
struct skb_shared_info {
dataref = {
counter = 10749386
},
nr_frags = 36864,
gso_size = 51745,
gso_segs = 4753,
gso_type = 33323,
ip6_frag_id = 203,
tx_flags = {
{
hardware = 0 '\000',
software = 0 '\000',
in_progress = 0 '\000',
reserved = 0 '\000',
dev_zerocopy = 0 '\000'
},
flags = 0 '\000'
},
frag_list = 0x17306e8a8ed53c9,
hwtstamps = {
hwtstamp = {
tv64 = -3881663074131507766
},
syststamp = {
tv64 = 650841363090
}
},
...
其实可以明显看出,上述示例中的 skb_shared_info 结构的数据不对,因为这个数据区被踩坏了,本示例就是在分析内存被踩的问题。
通过 sk_buff 查看 sock 结构内容,可以直接通过 sk_buff->sk 指针,从上面的 sk_buff 结构内容看,其值为 0xffff88114b4f0b40
struct sock {
__sk_common = {
{
skc_node = {
next = 0xffff8810debddb80,
pprev = 0xffff881204a84ec0
},
skc_nulls_node = {
next = 0xffff8810debddb80,
pprev = 0xffff881204a84ec0
}
},
skc_refcnt = {
counter = 2
},
skc_hash = 33728,
skc_family = 2,
skc_state = 1 '\001',
skc_reuse = 1 '\001',
skc_bound_dev_if = 0,
skc_bind_node = {
next = 0x0,
pprev = 0x0
},
。。。
sk_write_queue = {
next = 0xffff8810d3f26780,
prev = 0xffff8810d3f26780,
qlen = 1,
lock = {
raw_lock = {
slock = 0
}
}
},
通过 sock 结构,查看 sock 发送队列中的所有 sk_buff,可以通过 sock->sk_write_queue->next 和 sock->sk_write_queue->prev 查看:
crash> sk_buff 0xffff8810d3f26780
struct sk_buff {
next = 0xffff88114b4f0c08,
prev = 0xffff88114b4f0c08,
sk = 0xffff88114b4f0b40,
tstamp = {
tv64 = 0
},
dev = 0x0,
_skb_dst = 0,
sp = 0x0,
cb = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
len = 1500,
data_len = 1432,
mac_len = 0,
hdr_len = 0,
【转自http://blog.chinaunix.net/uid-14528823-id-4432077.html】
10T 技术资源大放送!包括但不限于:Linux、虚拟化、容器、云计算、网络、Python、Go 等。在公众号内回复「10T」,即可免费获!
推荐阅读:
shell编程100例(附PDF下载)
IPv6技术白皮书(附PDF下载)
Linux主流发行版本配置IP总结(Ubuntu、CentOS、Redhat、Suse)
批量安装Windows系统
无人值守批量安装服务器
运维必备的《网络端口大全》,看这一份就够了。
收藏:服务器和存储知识入门
什么叫SSH?原理详解,看这一篇就够了!
Nginx面试40问(收藏吃灰)
20 个 Linux 服务器性能调优技巧
超详细!一文带你了解LVS四层负载均衡企业级实践!
收藏 | Linux系统日志位置及包含的日志内容介绍
100 道 Linux 常见面试题,建议收藏,慢慢读~
服务器12种基本故障+排查方法
IT运维管理常用工具大全,让你成为真正的高手
什么是QoS?有收获,点个在看