在前段時間遇到一個挺有意思的問題,高概率集中在某個呼叫堆疊過程中,或是在 GC 過程中 verify 失敗發現存在壞根。經過幾天的調試後才鎖定編譯優化的問題,以Nterp
解釋運行緩存了位元組碼,出現幽靈
呼叫函數。
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0000000000000018
Cause: null pointer dereference
x0 0000000000000000 x1 0000000002e62ec8 x2 0000000000000000 x3 0000000072223650
x4 0000007c3ec13000 x5 3b7463656a624f2f x6 3b7463656a624f2f x7 0000007bbdababac
x8 0000000000000002 x9 2542ebd30d0dfceb x10 0000000000000000 x11 0000000000000002
x12 00000000af950a08 x13 b400007d15e5fa50 x14 0000007f1598f880 x15 0000007bbd9446e8
x16 0000007fea726e40 x17 0000000000000020 x18 0000007f15ca0000 x19 b400007d55e10be0
x20 0000000000000000 x21 b400007d55e10ca0 x22 0000000002d51610 x23 0000000002e61b08
x24 0000000000000005 x25 0000000000000002 x26 0000000002e62ec8 x27 0000000000000002
x28 00000000031b1f38 x29 00000000ffffffff
lr 00000000721b63c8 sp 0000007fea728e90 pc 00000000721b63d4 pst 0000000080001000101 total frames
backtrace:
#00 pc 00000000008123d4 /system/framework/arm64/boot-framework.oat (android.view.ViewGroup.jumpDrawablesToCurrentState+132)
#01 pc 00000000007ca6c8 /system/framework/arm64/boot-framework.oat (android.view.View.onDetachedFromWindowInternal+472)
#02 pc 00000000007bb8d0 /system/framework/arm64/boot-framework.oat (android.view.View.dispatchDetachedFromWindow+288)
#03 pc 000000000080c444 /system/framework/arm64/boot-framework.oat (android.view.ViewGroup.dispatchDetachedFromWindow+484)
#04 pc 000000000080c34c /system/framework/arm64/boot-framework.oat (android.view.ViewGroup.dispatchDetachedFromWindow+236)
#05 pc 000000000080c34c /system/framework/arm64/boot-framework.oat (android.view.ViewGroup.dispatchDetachedFromWindow+236)
...
verification.cc:124] GC tried to mark invalid reference 0x20052a8
verification.cc:124] ref=0x20052a8 klass=0x0 <invalid address> space=main space (region space) 0x2000000-0x42000000 card=0 adjacent_ram=0000000000000000 0000000000000000 0000000000000000 0000000000000000 |0000000000000000 0000000000000000 0000000000000000 0000000000000000
verification.cc:124] holder=0x3ac8698 klass=0x20052a8 <invalid address> space=main space (region space) 0x2000000-0x42000000 card=0 adjacent_ram=9d73f26e02aea3e0 0000000002a33738 1000000002a38c48 0000000002a33738 |bd4f9b6d020052a8 0000000000000000 0000000000000000 0000000000000000
verification.cc:124] reference addr adjacent_ram=9d73f26e02aea3e0 0000000002a33738 1000000002a38c48 0000000002a33738 |bd4f9b6d020052a8 0000000000000000 0000000000000000 0000000000000000 0xb400007a5760fdb0 main space (region space) 0x2000000-0x42000000
復現抓取了對應錯誤 core 文件進行分析,該類型問題利用 core-parser 解析能力是最佳的。
core-parser> bt
Switch oat version(259) env.
"main" sysTid=7943 Runnable
| group="main" daemon=0 prio=5 target=0x0 uncaught_exception=0x0
| tid=1 sCount=0 flags=0 obj=0x7370c3e0 self=0xb400007a09207010 env=0xb400007a4920dd50
| stack=0x7fc3918000-0x7fc391a000 stackSize=0x7ff000 handle=0x7c428fa098
| mutexes=0xb400007a092077b0 held="mutator lock"(shared held)
x0 0x0000000000000000 x1 0x00000000027e6d40 x2 0x0000000000000000 x3 0x0000000072fc3650
x4 0x0000007963213000 x5 0x000000006576696c x6 0x000000006576696c x7 0x000000794915fad4
x8 0x0000000000000002 x9 0x2e157e9e8ba7cb87 x10 0x0000000000000000 x11 0x0000000000000001
x12 0x00000000b06119e0 x13 0x000000000000005a x14 0x0000007c4156f880 x15 0x0000007948fa193c
x16 0x0000007fc410ff90 x17 0x0000000000000020 x18 0x0000007c41ae8000 x19 0xb400007a09207010
x20 0x0000000000000000 x21 0xb400007a092070d0 x22 0x00000000026cefa0 x23 0x00000000027e5980
x24 0x0000000000000005 x25 0x0000000000000002 x26 0x00000000027e6d40 x27 0x00000000027e2478
x28 0x00000000026cc620 fp 0x00000000027df8f0
lr 0x0000000072f563c8 sp 0x0000007fc4111fe0 pc 0x0000000072f563d4 pst 0x0000000080001000
Native: #0 0000000072f563d4
Native: #1 0000000072f563c4
ManagedStack* 0xb400007a092070b8 maybe invalid.
JavaKt: #0 0000000000000000 android.view.View[].
JavaKt: #1 0000000000000000
JavaKt: #2 0000000000000000 java.lang.reflect.Method.invoke
JavaKt: #3 000000795f4796fe com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run
JavaKt: #4 000000795f47e568 com.android.internal.os.ZygoteInit.main
該堆疊輸出失敗是因為在 oat 上報錯,ManagedStack 還未保存這一幀,因此需要進行假幀處理。
core-parser> fake stack --sp 0x0000007fc4111fe0 --pc 0x0000000072f563d4
core-parser> bt
"main" sysTid=7943 Runnable
| group="main" daemon=0 prio=5 target=0x0 uncaught_exception=0x0
| tid=1 sCount=0 flags=0 obj=0x7370c3e0 self=0xb400007a09207010 env=0xb400007a4920dd50
| stack=0x7fc3918000-0x7fc391a000 stackSize=0x7ff000 handle=0x7c428fa098
| mutexes=0xb400007a092077b0 held="mutator lock"(shared held)
x0 0x0000000000000000 x1 0x00000000027e6d40 x2 0x0000000000000000 x3 0x0000000072fc3650
x4 0x0000007963213000 x5 0x000000006576696c x6 0x000000006576696c x7 0x000000794915fad4
x8 0x0000000000000002 x9 0x2e157e9e8ba7cb87 x10 0x0000000000000000 x11 0x0000000000000001
x12 0x00000000b06119e0 x13 0x000000000000005a x14 0x0000007c4156f880 x15 0x0000007948fa193c
x16 0x0000007fc410ff90 x17 0x0000000000000020 x18 0x0000007c41ae8000 x19 0xb400007a09207010
x20 0x0000000000000000 x21 0xb400007a092070d0 x22 0x00000000026cefa0 x23 0x00000000027e5980
x24 0x0000000000000005 x25 0x0000000000000002 x26 0x00000000027e6d40 x27 0x00000000027e2478
x28 0x00000000026cc620 fp 0x00000000027df8f0
lr 0x0000000072f563c8 sp 0x0000007fc4111fe0 pc 0x0000000072f563d4 pst 0x0000000080001000
Native: #0 0000000072f563d4
Native: #1 0000000072f563c4
JavaKt: #0 0000000000000000 android.view.View[].
JavaKt: #1 0000000000000000
JavaKt: #2 0000000000000000 java.lang.reflect.Method.invoke
JavaKt: #3 000000795f4796fe com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run
JavaKt: #4 000000795f47e568 com.android.internal.os.ZygoteInit.main
core-parser> f 0
JavaKt: #00 000000795fe114ac android.view.ViewGroup.jumpDrawablesToCurrentState()
{
Location: /system/framework/framework.jar!classes4.dex
art::ArtMethod: 0x71a2c858
dex_pc_ptr: 0x795fe114ac
quick_frame: 0x7fc4111fe0
frame_pc: 0x72f563d4
method_header: 0x72f5634c
DEX CODE:
0x795fe114a2: 0212 | const/4 v2, #+0
0x795fe114a4: 1235 000a | if-ge v2, v1, 0x795fe114b8 //+10
0x795fe114a8: 0346 0200 | aget-object v3, v0, v2
0x795fe114ac: 106e 80d3 0003 | invoke-virtual {v3}, void android.view.View.jumpDrawablesToCurrentState() //
...
OAT CODE:
0x72f563b0: 6b18033f | cmp w25, w24
0x72f563b4: 540001aa | b.ge 0x72f563e8
0x72f563b8: 110032e0 | add w0, w23, #0xc
0x72f563bc: 1000007e | adr x30, 0x72f563c8
0x72f563c0: b59da314 | cbnz x20, 0x72e91820
0x72f563c4: b8797801 | ldr w1, [x0, x25, lsl #2]
0x72f563c8: aa0103fa | mov x26, x1
0x72f563cc: b9400020 | ldr w0, [x1]
0x72f563d0: f949c000 | ldr x0, [x0, #0x1380]
0x72f563d4: f9400c1e | ldr x30, [x0, #0x18]
0x72f563d8: d63f03c0 | blr x30
0x72f563dc: 11000739 | add w25, w25, #1
}
可以看到當前錯誤發生在 invoke-virtual jumpDrawablesToCurrentState 的過程當中。
core-parser> vtor 0x00000000027e6d40
*
VIRTUAL: 0x27e6d40
*
OFFSET: 0x7e6d40
*
OR: 0x78f506c6bd40
*
MMAP: 0x0
*
OVERLAY: 0x0
[2000000, 42000000) rw- 0040000000 0040000000 [anon:dalvik-main space (region space)] [*]
可見地址範圍是落在 Java 堆上,p 指令輸出下該對象信息。
core-parser> p 0x00000000027e6d40
ERROR: Size: 0x0
core-parser> rd 0x00000000027e6d40 -e 0x00000000027e6e40
27e6d40: 8adbc7e1b0000888 0000000000000000 ................
27e6d50: 0000000000000000 0000000000000000 ................
27e6d60: 0000000000000000 0000000000000000 ................
27e6d70: 0000000000000000 0000000000000000 ................
27e6d80: 0000000000000000 0000000000000000 ................
27e6d90: 028c86b000000000 0000000000000000 ................
27e6da0: 00000000026ccfd8 0000000000000000 ..l.............
27e6db0: 0000000071199090 028c86c800000000 ...q............
27e6dc0: 0000000000000000 0000000000000000 ................
27e6dd0: 028c8718028c86e8 028c8760028c8730 ........0...`...
27e6de0: 028c8798028c8778 0000000000000000 x...............
27e6df0: 0000000000000000 028c87e8028c87b0 ................
27e6e00: 00000000028c8860 028c887800000000 `...........x...
27e6e10: 0000000000000000 711a35f800000000 .............5.q
27e6e20: 026cefa000000000 0000000000000000 ......l.........
27e6e30: 0000000000000000 028c889000000000 ................
可見該地址是個壞根,非Java對象地址,由於 b0000888 落在有效內存段上。
core-parser> vtor b0000888
*
VIRTUAL: 0xb0000888
*
OFFSET: 0x1ef4888
*
OR: 0x75b21f265888
*
MMAP: 0x0
*
OVERLAY: 0x0
[ae10c000, b010c000) r-x 0002000000 0002000000 /memfd:jit-cache (deleted) [*]
core-parser> rd 0xb0001c08
b0001c08: 0000000000000000 .......
因此在 0x72f563d4: f9400c1e | ldr x30, [x0, #0x18] 上才出現錯誤。
core-parser> class android.view.View -m
[0x710274a8]
public class android.view.View extends java.lang.Object {
// Methods:
...
[0x71a15d60] public void android.view.View.jumpDrawablesToCurrentState()
...
0x710274a8 + 0x1380 = 0xb010288
core-parser> rd 0xb010288
71028828: 0000000071a15d60 `].q....
當能正確獲取 view klass 地址,程序是正確的。
core-parser> space -c
ERROR: Region:[0x27e6d40, 0x27e7100) main space (region space) has bad object!!
core-parser> rd 0x27e6c40 -e 0x27e6d40
27e6c40: 1800001000000000 0000000000000001 ................
27e6c50: 4000000000000000 0000000000000000 .......@........
27e6c60: 40a00000bf800000 bf8000003f800000 .......@...?....
27e6c70: 0000000000000000 0000000000000000 ................
27e6c80: 7fc000007fc00000 7fc0000000000000 ................
27e6c90: 000000003f800000 0000010100000001 ...?............
27e6ca0: 0000000000000000 0000000000000000 ................
27e6cb0: 0000000100000000 0100000000000000 ................
27e6cc0: 0000000000000000 028c8de0027e69a8 .........i~.....
27e6cd0: 028c8e00028c8df0 0000000000000000 ................
27e6ce0: 028c8eb000000000 0000000005aefcf5 ................
27e6cf0: 0000000000000011 0000000000000000 ................
27e6d00: 0000000100000000 028c8ec000000000 ................
27e6d10: 028c8f5000000000 70c4b320028c8f90 ....P..........p
27e6d20: 800a3035b02fc458 00000000026cf448 X./.50..H.l.....
27e6d30: 00000000b02fbed0 00000000026cf448 ../.....H.l.....
core-parser> p 27e6d30 -b
Size: 0x10
Padding: 0x4
Object Name: a.b.c.d.e.f.g$h
[0x8] final a.b.c.d.e.f.g k = 0x26cf448
// extends java.lang.Object
[0x4] private transient int shadow$_monitor_ = 0
[0x0] private transient java.lang.Class shadow$_klass_ = 0xb02fbed0
Binary:
27e6d30: 00000000b02fbed0 00000000026cf448 ../.....H.l.....
core-parser> p 0x27e7100 -b
Size: 0x10
Object Name: a.b.c.d.u.s.t
[0x0c] private java.io.File toq = 0x0
[0x08] private final android.content.Context k = 0x26dd860
// extends java.lang.Object
[0x04] private transient int shadow$_monitor_ = 0
[0x00] private transient java.lang.Class shadow$_klass_ = 0xb0208330
Binary:
27e7100: 00000000b0208330 00000000026dd860 0.......`.m.....
a.b.c.d.e.f.g$h | 壞根大小 | a.b.c.d.u.s.t |
---|---|---|
0x27e6d30 | 0x27e6d40 | 0x27e7100 |
0x27e6d40 ~ 0x27e7100 中間存在至少一個壞根,總大小未 0x3c0。
core-parser> rd 0x27e6d40 -e 0x27e7100
27e6d40: 8adbc7e1b0000888 0000000000000000 ................
27e6d50: 0000000000000000 0000000000000000 ................
27e6d60: 0000000000000000 0000000000000000 ................
27e6d70: 0000000000000000 0000000000000000 ................
27e6d80: 0000000000000000 0000000000000000 ................
27e6d90: 028c86b000000000 0000000000000000 ................
27e6da0: 00000000026ccfd8 0000000000000000 ..l.............
27e6db0: 0000000071199090 028c86c800000000 ...q............
27e6dc0: 0000000000000000 0000000000000000 ................
27e6dd0: 028c8718028c86e8 028c8760028c8730 ........0...`...
27e6de0: 028c8798028c8778 0000000000000000 x...............
27e6df0: 0000000000000000 028c87e8028c87b0 ................
27e6e00: 00000000028c8860 028c887800000000 `...........x...
27e6e10: 0000000000000000 711a35f800000000 .............5.q
27e6e20: 026cefa000000000 0000000000000000 ......l.........
27e6e30: 0000000000000000 028c889000000000 ................
通過數據特徵分析,確定為某個Java對象的數據結構。這裡就定義為 A 類對象。
core-parser> class A -f
[0xb01f0888]
public final class A extends androidx.appcompat.widget.AppCompatImageView {
// Object instance fields:
[0x03b8] private boolean n
[0x03b4] private a.b.c.d.u.o q
[0x03b0] private volatile a.b.c.d.u.H k
// extends androidx.appcompat.widget.AppCompatImageView
[0x03ac] private final androidx.appcompat.widget.ld6 mImageHelper
[0x03a8] private final androidx.appcompat.widget.q mBackgroundTintHelper
[0x03a6] private boolean mHasLevel
...
}
該對象大小正好 0x3c0,滿足堆內存前後關係。
core-parser> wd 0x27e6d40 -v 8adbc7e1b01f0888
core-parser> p 0x27e6d40
Size: 0x3c0
Padding: 0x7
Object Name: A
[0x3b8] private boolean n = false
[0x3b4] private a.b.c.d.u.o q = 0x0
[0x3b0] private volatile a.b.c.d.u.H k = 0x0
// extends androidx.appcompat.widget.AppCompatImageView
[0x3ac] private final androidx.appcompat.widget.ld6 mImageHelper = 0x28c8a10
[0x3a8] private final androidx.appcompat.widget.q mBackgroundTintHelper = 0x28c89f0
[0x3a6] private boolean mHasLevel = false
...
[0x004] private transient int shadow$_monitor_ = -1965307935
[0x000] private transient java.lang.Class shadow$_klass_ = 0xb01f0888
core-parser> p 0x28c8a10
Size: 0x20
Padding: 0x4
Object Name: androidx.appcompat.widget.ld6
[0x18] private int n = 0
[0x14] private androidx.appcompat.widget.d3 zy = 0x0
[0x10] private androidx.appcompat.widget.d3 toq = 0x0
[0x0c] private androidx.appcompat.widget.d3 q = 0x0
[0x08] private final android.widget.ImageView k = 0x27e6d40
// extends java.lang.Object
[0x04] private transient int shadow$_monitor_ = 0
[0x00] private transient java.lang.Class shadow$_klass_ = 0xb0299708
core-parser> p 0x28c89f0
Size: 0x20
Object Name: androidx.appcompat.widget.q
[0x1c] private int zy = -1
[0x18] private final androidx.appcompat.widget.f7l8 toq = 0x2766330
[0x14] private androidx.appcompat.widget.d3 q = 0x0
[0x10] private androidx.appcompat.widget.d3 n = 0x0
[0x0c] private final android.view.View k = 0x27e6d40
[0x08] private androidx.appcompat.widget.d3 g = 0x0
// extends java.lang.Object
[0x04] private transient int shadow$_monitor_ = 0
[0x00] private transient java.lang.Class shadow$_klass_ = 0xb0299bf0
可見內存中的其它成員對象是正確有效的。也就是 klass_ 內容中 1f 被擦掉。
core-parser> rd 0x27e6d40 --ori
27e6d40: 8adbc7e1b0000888 ........
core-parser> rd 0x27e6d40
27e6d40: 8adbc7e1b01f0888 ........
0xb01f0888 + 0x1380 = 0xb01f1c08
core-parser> rd 0xb01f1c08
b01f1c08: 0000000071a76ea8 .n.q....
core-parser> method 0000000071a76ea8
public void android.widget.ImageView.jumpDrawablesToCurrentState() [dex_method_idx=51663]
地址 | 正確值 | 錯誤值 | 對象名 |
---|---|---|---|
27e6d40 | 8adbc7e1b01f0888 | 8adbc7e1b0000888 | A |
本地復現出現各類場景堆疊,基本都是 klass_ 壞根錯誤。統計錯誤特徵如下:
錯誤值 | 正確值 | 類名 |
---|---|---|
0x70004a00 | 0x70504a00 | java.lang.String |
0xaf005a30 | 0xafa55a30 | A |
0xaf004db0 | 0xafb54db0 | a.b.c.u.m$t |
0xaf0000a8 | 0xafb600a8 | a.b.c.d.e.b.M |
0xaf00d4d8 | 0xafb2d4d8 | a.a.t.b.i.T |
0x7000b170 | 0x708fb170 | android.view.View$ScrollabilityCache |
0x7000ff90 | 0x704eff90 | java.lang.Object[] |
0x70008970 | 0x70bf8970 | android.view.View$ListenerInfo |
0x70003418 | 0x704f3418 | java.util.HashMap |
main space 的類 | ||
0x030026f8 | 0x032326f8 | android.graphics.RenderNodeStubImpl |
由于沒有條件設置硬件觀察點,並且觀察 Java 對象地址,變化過大不可取,於是通過 core-parser 注入一個觀察線程,通過異步抓取最近節點的堆疊。
// Monitor::CheckVirtualMemory((void *)0x2000000, 0x1000000);
void Monitor::CheckVirtualMemory(void *vaddr, uint64_t size) {
while (1) {
uint64_t *current = (uint64_t *)vaddr;
uint64_t *end = (uint64_t *)((uint64_t)vaddr + size);
while (current < end) {
volatile uint32_t value = (uint32_t)(*current);
if (value == 0x70004a00 || value == 0xaf005a30 ||
value == 0xaf004db0 || value == 0xaf0000a8)
Report();
current++;
}
}
}
void Monitor::Report() {
raise(SIGSEGV);
}
觀察範圍設定在 0x2000000 ~ 0x3000000,這範圍是在前面本地復現壞根對象地址統計的基礎確定。
core-parser> space
TYPE REGION ADDRESS NAME
5 [0x2000000, 0x42000000) 0xb400007a6920aef0 main space (region space)...
"main" sysTid=28886 Waiting
| group="main" daemon=0 prio=5 target=0x0 uncaught_exception=0x0
| tid=1 sCount=0 flags=0 obj=0x72f744c0 self=0xb40000726060b380 env=0xb4000072f0615050
| stack=0x7fe9d3d000-0x7fe9d3f000 stackSize=0x7ff000 handle=0x70d0225098
| mutexes=0xb40000726060bb20 held=
x0 0xb40000735061ad80 x1 0x0000000000000080 x2 0x00000000000005c3 x3 0x0000000000000000
x4 0x0000000000000000 x5 0x0000000000000000 x6 0x0000000000000000 x7 0x0000000027d57d8b
x8 0x0000000073d3317c x9 0x0000000000000001 x10 0x0000000000000000 x11 0x0000000020000000
x12 0x0000000000000004 x13 0x000000000000005a x14 0x0000007ff20255a0 x15 0x000000007361499c
x16 0x0000007ff20234e0 x17 0x0000000070dc60f0 x18 0x00000070cf728000 x19 0xb400006ffd004010
x20 0x0000000000000000 x21 0xb400006ffd0040d0 x22 0x00000000021c07f0 x23 0x00000000022f7008
x24 0x0000000002b33b40 x25 0x0000007ff20255c0 x26 0x0000000018300004 x27 0x000000000000
---
原文出處:https://juejin.cn/post/7549928864300711963