用例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
| package main
import ( "fmt" "log" "sync" "time" )
type ID struct { id int token string }
type IDStorage struct { name string storage map[int]*ID }
func (s *IDStorage) push(id int) { s.storage[id] = &ID{ id: id, token: fmt.Sprintf("id:%d",id), } }
func main() { reqCh := make(chan struct {}) rspCh := make(chan struct {}) quitCh := make(chan struct {}) var lock sync.RWMutex
go func() { <- reqCh lock.RLock() rspCh <- struct{}{} lock.RUnlock() }() go func() { lock.Lock() reqCh <- struct{}{} <- rspCh lock.Unlock() quitCh <- struct{}{} }()
ticker := time.NewTicker(2 * time.Second) ids := &IDStorage{name:"storage", storage:make(map[int]*ID)} go func() { i := 1 for range ticker.C { ids.push(i) i++ log.Printf("keep alive tick %d.", len(ids.storage)) } }() <- quitCh }
|
编译
go build -gcflags '-N -l'
dlv调试流程
1. 找出启动进程pid
2. dlv attach pid
3. grs查看所有goroutines
(dlv) grs
Goroutine 1 - User: /root/mycodes/leetcode/testdlv.go:57 main.main (0x49d71f)
Goroutine 2 - User: /usr/lib/golang/src/runtime/proc.go:305 runtime.gopark (0x431de0)
Goroutine 3 - User: /usr/lib/golang/src/runtime/proc.go:305 runtime.gopark (0x431de0)
Goroutine 4 - User: /usr/lib/golang/src/runtime/proc.go:305 runtime.gopark (0x431de0)
Goroutine 5 - User: /usr/lib/golang/src/runtime/proc.go:305 runtime.gopark (0x431de0)
Goroutine 6 - User: /usr/lib/golang/src/runtime/sema.go:71 sync.runtime_SemacquireMutex (0x4403a7)
Goroutine 7 - User: /root/mycodes/leetcode/testdlv.go:42 main.main.func2 (0x49d853)
Goroutine 8 - User: /root/mycodes/leetcode/testdlv.go:51 main.main.func3 (0x49d91e)
根据每个协程的行号提示,可以看出goroutine 1就是执行main函数的协程,
goroutine 7 是执行lock.Lock()的协程
goroutine 8 是执行tick定期打印的协程
goroutine 6 虽然无法直接看出与执行lock.RLock()有关联,但排除了2、3、4、5这4个golang系统调度的协程,也可以推测出. 实际上6正是一直等待锁的协程,我们可以切换到6号协程验证下:
(dlv) gr 6
Switched from 1 to 6 (thread 7936)
(dlv) bt
0 0x0000000000431de0 in runtime.gopark
at /usr/lib/golang/src/runtime/proc.go:305
1 0x0000000000440640 in runtime.goparkunlock
at /usr/lib/golang/src/runtime/proc.go:310
2 0x0000000000440640 in runtime.semacquire1
at /usr/lib/golang/src/runtime/sema.go:144
3 0x00000000004403a7 in sync.runtime_SemacquireMutex
at /usr/lib/golang/src/runtime/sema.go:71
4 0x000000000046cce7 in sync.(*RWMutex).RLock
at /usr/lib/golang/src/sync/rwmutex.go:50
5 0x000000000049d7a7 in main.main.func1
at /root/mycodes/leetcode/testdlv.go:35
6 0x000000000045c1c1 in runtime.goexit
at /usr/lib/golang/src/runtime/asm_amd64.s:1373
通过栈帧调用可以看出正是goroutine 6发生了死锁等待.
这里总结一个经验: 一旦程序长时间阻塞,并且cpu负载很低,大概率是发生了死锁,
这时只要查看所有的协程中是否有处于调用sync.runtime_SemacquireMutex阻塞的(sync.Mutex底层是通过信号量实现)
实际上这个用例中最后一个定期tick的协程是故意加上去的,因为一旦所有协程都阻塞,
golang会自动检测出死锁.
现在切回执行main函数的协程:
(dlv) gr 1
Switched from 6 to 1 (thread 7936)
(dlv) bt
0 0x0000000000431de0 in runtime.gopark
at /usr/lib/golang/src/runtime/proc.go:305
1 0x0000000000405557 in runtime.chanrecv
at /usr/lib/golang/src/runtime/chan.go:525
2 0x000000000040521b in runtime.chanrecv1
at /usr/lib/golang/src/runtime/chan.go:407
3 0x000000000049d71f in main.main
at /root/mycodes/leetcode/testdlv.go:57
4 0x0000000000431a12 in runtime.main
at /usr/lib/golang/src/runtime/proc.go:203
5 0x000000000045c1c1 in runtime.goexit
at /usr/lib/golang/src/runtime/asm_amd64.s:1373
(dlv) locals
mp = (unreadable could not find loclist entry at 0x39948 for address 0x431de0)
gp = (unreadable could not find loclist entry at 0x3997b for address 0x431de0)
status = (unreadable could not find loclist entry at 0x399ae for address 0x431de0)
观察到<- quitCh在源文件57行,处于3层栈帧,切换验证下
(dlv) frame 3
> runtime.gopark() /usr/lib/golang/src/runtime/proc.go:305 (PC: 0x431de0)
Warning: debugging optimized function
Frame 3: /root/mycodes/leetcode/testdlv.go:57 (PC: 49d71f)
52: ids.push(i)
53: i++
54: log.Printf("keep alive tick %d.", len(ids.storage))
55: }
56: }()
=> 57: <- quitCh
58: }
查看main函数所有栈上变量
(dlv) locals
reqCh = chan struct {} 0/0
rspCh = chan struct {} 0/0
quitCh = chan struct {} 0/0
lock = sync.RWMutex {w: (*sync.Mutex)(0xc000014100), writerSem: 0, readerSem: 0,...+2 more}
ticker = (*time.Ticker)(0xc0000620a0)
ids = (*main.IDStorage)(0xc00000c080)
具体看下ids这个变量的结构
(dlv) p ids
*main.IDStorage {
name: "storage",
storage: map[int]*main.ID [
1: *(*main.ID)(0xc000108000),
2: *(*main.ID)(0xc000108020),
3: *(*main.ID)(0xc000108040),
4: *(*main.ID)(0xc000108060),
5: *(*main.ID)(0xc000108080),
],}
(dlv) p ids.storage[1]
*main.ID {id: 1, token: "id:1"}
(dlv) p ids.storage[2]
*main.ID {id: 2, token: "id:2"}
上面通过locals查看到的ids = (*main.IDStorage)(0xc00000c080)其实是ids这个指针变量的内容,即分配的main.IDStorage内存块的首地址
我们来验证下:
(dlv) p &ids.name
(*string)(0xc00000c080)
可以看到这2个值都是0xc00000c080,因此结论是正确的
ps: 有些时候locals或者vars看不到某些指针变量存储内存的实际地址,
通过p &xxx.first_field_name是一种方式.
有时想查看某个goroutine的堆栈或具体内容,又不想切来切去可以这样(以7号协程为例):
(dlv) gr 7 bt
0 0x0000000000431de0 in runtime.gopark
at /usr/lib/golang/src/runtime/proc.go:305
1 0x0000000000405557 in runtime.chanrecv
at /usr/lib/golang/src/runtime/chan.go:525
2 0x000000000040521b in runtime.chanrecv1
at /usr/lib/golang/src/runtime/chan.go:407
3 0x000000000049d853 in main.main.func2
at /root/mycodes/leetcode/testdlv.go:42
4 0x000000000045c1c1 in runtime.goexit
at /usr/lib/golang/src/runtime/asm_amd64.s:1373
(dlv) gr 7 frame 3
> runtime.gopark() /usr/lib/golang/src/runtime/proc.go:305 (PC: 0x431de0)
Warning: debugging optimized function
Frame 3: /root/mycodes/leetcode/testdlv.go:42 (PC: 49d853)
37: lock.RUnlock()
38: }()
39: go func() {
40: lock.Lock()
41: reqCh <- struct{}{}
=> 42: <- rspCh
43: lock.Unlock()
44: quitCh <- struct{}{}
45: }()
46:
47: ticker := time.NewTicker(2 * time.Second)
这样选择的协程则不会发生切换.