[20250205]21c library cache mutex的深入探究10(_mutex_wait_scheme=1).txt

来源:这里教程网 时间:2026-03-03 21:33:14 作者:

[20250205]21c library cache mutex的深入探究10(_mutex_wait_scheme=1).txt --//探究library cache mutex阻塞导致的相关等待事件,分析_mutex_wait_scheme=1的情况的相关细节。 1.环境: SCOTT@book01p> @ ver2 ============================== PORT_STRING                   : x86_64/Linux 2.4.xx VERSION                       : 21.0.0.0.0 BANNER                        : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production BANNER_FULL                   : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production Version 21.3.0.0.0 BANNER_LEGACY                 : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production CON_ID                        : 0 PL/SQL procedure successfully completed. SYS@book> @ hidez ^_mutex  NUM N_HEX NAME               DESCRIPTION       DEFAULT_VALUE SESSION_VALUE SYSTEM_VALUE ISSES ISSYS_MOD ---- ----- ------------------ ----------------- ------------- ------------- ------------ ----- --------- 3553   DE1 _mutex_wait_time   Mutex wait time   TRUE          1             1            FALSE IMMEDIATE 3554   DE2 _mutex_spin_count  Mutex spin count  TRUE          255           255          FALSE IMMEDIATE 3555   DE3 _mutex_wait_scheme Mutex wait scheme TRUE          2             2            FALSE IMMEDIATE --//缺省_mutex_wait_time=1,时间单位与_mutex_wait_scheme相关,_mutex_wait_scheme=2时时间单位是厘秒,而 --//_mutex_wait_scheme=0,1时,单位时毫秒。 --//_mutex_wait_scheme =2时,_mutex_wait_time>1时sleeps的时间会出现指数回退. --//缺省_mutex_wait_scheme =2. --//网上找了一段资料: * _mutex_spin_count (Integer) - This sets the number of times to spin before yielding/waiting. * _mutex_wait_scheme (Integer) - In 11.2 this controls which wait scheme to use. It can be set to one of the three wait schemes described above thus: _mutex_wait_scheme = 0                        – Always YIELD _mutex_wait_scheme = 1 & _mutex_wait_time = t – Always SLEEP for t milli-seconds _mutex_wait_scheme = 2 & _mutex_wait_time = t – EXP BACKOFF with maximum sleep (default) 2.测试前准备: SYS@book> oradebug setmypid Statement processed. SYS@book> oradebug dump library_cache 4 Statement processed. $ grep "^Bucket:" /u01/app/oracle/diag/rdbms/book/book/trace/book_ora_10770.trc | head -4 Bucket: #=0 Mutex=0x6cfa1400(1125281431552, 9, 0, 6) Bucket: #=5 Mutex=0x6cfa14f0(1125281431552, 4, 0, 6) Bucket: #=17 Mutex=0x6cfa1730(1125281431552, 8, 0, 6) Bucket: #=39 Mutex=0x6cfa1b50(1125281431552, 4, 0, 6) --//以上是前面测试的结果,直接找Bucket: #=0 Mutex=0x6cfa1400的语句测试。 --//该sql语句select /*+ 9 */ count(*) from dept where deptno = 93834;的bucket=0. --//Bucket: #=0 Mutex=0x6cfa1400 --//0x6cfa1400-0x10 = 0x6cfa13f0 --//昨天测试了_mutex_wait_scheme=2的情况,当_mutex_wait_time>1时才会出现EXP BACKOFF with maximum sleep。 --//上午测试_mutex_wait_scheme=0的情况,下午测试_mutex_wait_scheme=1的情况. --//session 2: SYS@book> alter system set "_mutex_wait_scheme"=1 scope=memory; System altered. 2.测试: --//session 1: SCOTT@book01p> @ spid ============================== SID                           : 148 SERIAL#                       : 21638 PROCESS                       : 3538 SERVER                        : DEDICATED SPID                          : 3540 PID                           : 49 P_SERIAL#                     : 4 KILL_COMMAND                  : alter system kill session '148,21638' immediate; PL/SQL procedure successfully completed. SCOTT@book01p> alter session  set session_cached_cursors=0 ; Session altered. --//主要目的避免光标缓存,保证每次执行library cache mutex gets增加。 SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;   COUNT(*) ----------          0 --//执行多次。 SCOTT@book01p> @ hashz HASH_VALUE SQL_ID        CHILD_NUMBER KGL_BUCKET HASH_HEX   SQL_EXEC_START      SQL_EXEC_ID ---------- ------------- ------------ ---------- ---------- ------------------- ----------- 3403546624 as3g00v5dw000            0          0  cade0000  2025-02-05 15:32:19    16777219 --//session 2: SYS@book> @opeek 0x6cfa13f0 48 0 [06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 00000007 00000000 00000000 00000000 67B3EFC0 00000000 --//gets=0x07 SYS@book> @ sharepool/shp4z as3g00v5dw000 -1 SYS@book> @ pr ============================== HANDLE_TYPE                   : parent handle address KGLHDADR                      : 0000000064283498 KGLHDPAR                      : 0000000064283498 C40                           : select /*+ 9 */ count(*) from dept where KGLHDLMD                      : 0 KGLHDPMD                      : 0 KGLHDIVC                      : 0 KGLOBHD0                      : 0000000067B06080 KGLOBHD6                      : 00 KGLOBHS0                      : 4064 KGLOBHS6                      : 0 KGLOBT16                      : 0 N0_6_16                       : 4064 N20                           : 4064 KGLNAHSH                      : 3403546624 KGLOBT03                      : as3g00v5dw000 KGLOBT09                      : 65535 PL/SQL procedure successfully completed. --//父游标句柄地址0000000064283498已经写入mutex地址-0x10处。 SYS@book> @opeek 0x6cfa13f0 48 0 [06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 0000000B 00000000 00000000 00000000 67B3EFC0 00000000 --//gets=0xb,似乎执行shp4z脚本也会导致gets数量增加。 4.继续测试: --//由于目前版本21c不支持oradebug poke内存信息,采用gdb操作。 --//session 3: SYS@book> @ spid ============================== SID                           : 407 SERIAL#                       : 16644 PROCESS                       : 3833 SERVER                        : DEDICATED SPID                          : 3834 PID                           : 99 P_SERIAL#                     : 2 KILL_COMMAND                  : alter system kill session '407,16644' immediate; PL/SQL procedure successfully completed. --//window 1: $ strace -Ttt -y -f -p 3828 2>&1 | tee  0205mutex.txt --//window 2: $rlgdb -f -p 3834 ... (gdb) x /8wx 0x6cfa1400 0x6cfa1400:     0x00000000      0x00000000      0x0000000c      0x00000000 0x6cfa1410:     0x00000000      0x00000000      0x67b3efc0      0x00000000 (gdb) set *(int *)0x6cfa1404=0x8888 (gdb) x /8wx 0x6cfa1400 0x6cfa1400:     0x00000000      0x00008888      0x0000000c      0x00000000 0x6cfa1410:     0x00000000      0x00000000      0x67b3efc0      0x00000000 --//gets数量0x0c --//session 1: SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834; --//挂起!!等3X秒以上。 --//window 2: (gdb) set *(int *)0x6cfa1404=0x0 (gdb) x /8wx 0x6cfa1400 0x6cfa1400:     0x00000000      0x00000000      0x0000000d      0x00002cf8 0x6cfa1410:     0x00000000      0x00000000      0x67b3efc0      0x00000000 --//gets增加到0xd,增加1次,sleeps增加到0x00002cf8 = 11512 --//session 1: SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;   COUNT(*) ----------          0 --//执行完成。 5.分析: --//session 2: SYS@book> @ ashtop event,sid,p1,p1raw,p2raw,p3raw 1=1 "'2025-02-05 15:37:50'" "'2025-02-05 15:38:34'"     Total                                                                                                                                                              Distinct Distinct    Distinct   Seconds     AAS %This   EVENT                         SID         P1 P1RAW             P2RAW             P3RAW             FIRST_SEEN          LAST_SEEN           Execs Seen  Tstamps Execs Seen1 --------- ------- ------- ----------------------------- --- ---------- ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------        43     1.0   70% | library cache: bucket mutex X 283          0 0000000000000000  0000888800000000  000000000000003E  2025-02-05 15:37:51 2025-02-05 15:38:33          1       43          43         9      .2   15% |                                 5          3                                                       2025-02-05 15:37:51 2025-02-05 15:38:33          1        9           9         5      .1    8% |                               259        100                                                       2025-02-05 15:38:06 2025-02-05 15:38:30          1        5           5         2      .0    3% | LGWR all worker groups        258          0 0000000000000000  0000000000000000  0000000000000000  2025-02-05 15:37:52 2025-02-05 15:38:01          1        2           2         1      .0    2% |                                 1        100                                                       2025-02-05 15:37:59 2025-02-05 15:37:59          1        1           1         1      .0    2% |                               136          0                                                       2025-02-05 15:37:52 2025-02-05 15:37:52          1        1           1 6 rows selected. --//window 2: --//按ctrl+c中断strace跟踪。 $ egrep "getrusage" 0205mutex.txt 15:37:50.871140 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 287649}, ru_stime={0, 38280}, ...}) = 0 <0.000018> 15:37:52.533279 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 300599}, ru_stime={0, 42463}, ...}) = 0 <0.000014> 15:37:54.534693 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 312143}, ru_stime={0, 48198}, ...}) = 0 <0.000062> 15:37:56.539985 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 329404}, ru_stime={0, 58908}, ...}) = 0 <0.000018> 15:37:58.542490 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 332266}, ru_stime={0, 70013}, ...}) = 0 <0.000029> 15:38:00.546928 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 352535}, ru_stime={0, 83527}, ...}) = 0 <0.000045> 15:38:02.550225 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 371273}, ru_stime={0, 96018}, ...}) = 0 <0.000011> 15:38:04.558715 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 377830}, ru_stime={0, 109984}, ...}) = 0 <0.000014> 15:38:06.559932 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 397581}, ru_stime={0, 136085}, ...}) = 0 <0.000013> 15:38:08.570251 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 409084}, ru_stime={0, 152916}, ...}) = 0 <0.000028> 15:38:10.572618 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 427753}, ru_stime={0, 167381}, ...}) = 0 <0.000028> 15:38:12.586048 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 441045}, ru_stime={0, 193792}, ...}) = 0 <0.000081> 15:38:14.604125 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 454608}, ru_stime={0, 212421}, ...}) = 0 <0.000042> 15:38:16.604363 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 464452}, ru_stime={0, 231549}, ...}) = 0 <0.000028> 15:38:18.626482 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 479808}, ru_stime={0, 252242}, ...}) = 0 <0.000013> 15:38:20.634835 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 490703}, ru_stime={0, 263907}, ...}) = 0 <0.000013> 15:38:22.639064 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 497624}, ru_stime={0, 281622}, ...}) = 0 <0.000018> 15:38:24.640630 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 507448}, ru_stime={0, 292334}, ...}) = 0 <0.000013> 15:38:26.656429 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 511999}, ru_stime={0, 311053}, ...}) = 0 <0.000091> 15:38:28.656760 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 528800}, ru_stime={0, 332948}, ...}) = 0 <0.000015> 15:38:30.659396 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 539324}, ru_stime={0, 341291}, ...}) = 0 <0.000014> 15:38:32.660070 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554088}, ru_stime={0, 358865}, ...}) = 0 <0.000013> 15:38:33.698162 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554088}, ru_stime={0, 379381}, ...}) = 0 <0.000010> 15:38:33.699511 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 379524}, ...}) = 0 <0.000008> --//调用getrusage是2秒间隔. $ awk '{print $2}' 0205mutex.txt |egrep "semtimedop|getrusage|sched_yield|select" | uniq -c | head -10       1 getrusage(0x1       1 sched_yield()     339 select(0,       1 getrusage(0x1     329 select(0,       1 getrusage(0x1     614 select(0,       1 getrusage(0x1     317 select(0,       1 getrusage(0x1 --//开始1次sched_yield()调用,其后调用select许多次,2秒之后调用getrusage,如此反复循环。 $ grep select 0205mutex.txt | head 15:37:50.871613 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001440> 15:37:50.873155 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001795> 15:37:50.875050 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001956> 15:37:50.877092 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001829> 15:37:50.879008 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001840> 15:37:50.880933 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001878> 15:37:50.882895 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001873> 15:37:50.884854 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001943> 15:37:50.886881 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001821> 15:37:50.888788 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001927> --//休眠的时间单位微秒(1/10^6秒),1000微秒= 1000/10^6  = .001秒。select休眠的时间很短1毫秒。 --//而_mutex_wait_scheme=0是99次sched_yield,1次select(0.001秒),遇到getrusage分开,相加次数还是99次,如此反复循环。 --//而_mutex_wait_scheme=2,使用semtimedop调用(0.01秒)。 SYS@book> @ mutexprofz idn,loc "ts>=trunc(sysdate)+15/24+37/1440+50/86400" -- MutexProf by Tanel Poder (http://www.tanelpoder.com) -- Showing profile of top 20 sleeps... -- column info : id idn hash hash_value=>hash_value ts=>sleep_timestamp --               req=>requesting_session blk=>blocking_session val=>mutex_value maddr=>mutex_addr SUM_SLEEPS      GETS_DIFF MUTEX_TYPE             IDN GET_LOCATION                      OBJECT_NAME ---------- -------------- --------------- ---------- --------------------------------- --------------------------------------------------------------------------------      11512                Library Cache            0 kglhdgn1  62                      (name not found)          2                Library Cache   1068676711 kglpin1   4                       SELECT /*+ NO_STATEMENT_QUEUING RESULT_CACHE (SYSOBJ=FALSE) */                                                                                        "TS#","NAME","ONLINE$","CONTENTS$","BLOCKSIZE","BITMAPPED","FLAGS" FROM                                                                                        "SYS"."TS$" "K" WHERE 1=1 --//0x00002cf8 = 11512 --//记录的sleeps次数也是11512次。GET_LOCATION 对应的数字62 与前面ashtop 看到的p2 0x3e对应,注意id=0表示bucket桶号。 $ grep "select" 0205mutex.txt |wc   11511  126621  851814 $ grep "sched_yield" 0205mutex.txt |wc       1       5      55 --//11511+1=11512=0x2cf8完全能对应上。 SYS@book> @opeek 0x6cfa13f0 48 0 [06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 00000016 00002CF8 00000000 00000000 67B3EFC0 00000000 --//与opeek执行看到的一致。 6.小结: --//缺省_mutex_wait_time=1,_mutex_wait_scheme=1的情况下select的时间是1毫秒。 --//_mutex_wait_scheme=1模式下开始1次sched_yield()调用,其后调用select许多次,2秒之后调用getrusage,如此反复循环。 --//_mutex_wait_scheme=1模式调用,调用select仅仅0.001秒。 --//测试mutex_wait_scheme=1,修改_mutex_wait_time的情况。按照前面介绍应该是select每次_mutex_wait_time,单位毫秒。 SYS@book> alter system set "_mutex_wait_time"=20 scope=memory; System altered. --//重复测试: $ egrep "getrusage" 0205mutexa.txt | head -20 16:05:27.368592 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 381378}, ...}) = 0 <0.000009> 16:05:28.756046 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 382136}, ...}) = 0 <0.000027> 16:05:30.777212 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 555697}, ru_stime={0, 386260}, ...}) = 0 <0.000046> 16:05:32.809704 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 556296}, ru_stime={0, 389981}, ...}) = 0 <0.000029> 16:05:34.808683 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 558242}, ru_stime={0, 390338}, ...}) = 0 <0.000157> 16:05:36.813974 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 561839}, ru_stime={0, 393287}, ...}) = 0 <0.000030> 16:05:38.817664 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 563650}, ru_stime={0, 397445}, ...}) = 0 <0.000038> 16:05:40.821995 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 564663}, ru_stime={0, 401474}, ...}) = 0 <0.000145> 16:05:42.858149 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 569048}, ru_stime={0, 403980}, ...}) = 0 <0.000033> 16:05:44.856437 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 571326}, ru_stime={0, 405598}, ...}) = 0 <0.000080> 16:05:46.873433 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 573060}, ru_stime={0, 407250}, ...}) = 0 <0.000017> 16:05:48.876284 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 576291}, ru_stime={0, 410389}, ...}) = 0 <0.000018> 16:05:50.910784 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 578221}, ru_stime={0, 410726}, ...}) = 0 <0.000015> 16:05:52.890046 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 578221}, ru_stime={0, 413457}, ...}) = 0 <0.000029> 16:05:54.904141 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 581566}, ru_stime={0, 413947}, ...}) = 0 <0.000028> 16:05:56.913854 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 584040}, ru_stime={0, 415708}, ...}) = 0 <0.000046> 16:05:58.895999 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 585411}, ru_stime={0, 418569}, ...}) = 0 <0.000008> 16:05:58.897166 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 585536}, ru_stime={0, 418658}, ...}) = 0 <0.000009> --//间隔还是2秒。 --//除非设置"_mutex_wait_time">2000,估计没人这么做。 $ awk '{print $2}' 0205mutexa.txt |egrep "semtimedop|getrusage|sched_yield|select" | uniq -c | head -10       1 getrusage(0x1       1 sched_yield()      35 select(0,       1 getrusage(0x1      76 select(0,       1 getrusage(0x1      62 select(0,       1 getrusage(0x1      42 select(0,       1 getrusage(0x1  $ grep "select" 0205mutexa.txt |head -4 16:05:27.368878 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.028785> 16:05:27.397848 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.031134> 16:05:27.429072 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.021033> 16:05:27.450178 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.040958> --//休眠的时间单位微秒(1/10^6秒),20000微秒= 20000/10^6  = 0.020秒。select休眠的时间20毫秒。 --//符合前面介绍的_mutex_wait_scheme = 1 & _mutex_wait_time = t – Always SLEEP for t milli-seconds 一致。

相关推荐