记录自学postgreSQL源码的过程, 非常感谢大神的分享 http://blog.itpub.net/6906/ 大量借鉴了大神的博客内容,拜谢。本人属于初学者,理解能力,代码阅读能力也有限,如果由错误的地方请多多谅解,欢迎讨论。 源码如下
/*
* Primary entry point for manual VACUUM and ANALYZE commands
* 手工执行VACUUM/ANALYZE命令时的主入口
*
* This is mainly a preparation wrapper for the real operations that will
* happen in vacuum().
* 这是vacuum()函数的包装器(wrapper)
*/
void
ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
{
VacuumParams params;
bool verbose = false;
bool skip_locked = false;
bool analyze = false;
bool freeze = false;
bool full = false;
bool disable_page_skipping = false;
ListCell *lc;
/* Set default value */
//@lguan
//设置默认值
params.index_cleanup = VACOPT_TERNARY_DEFAULT;
params.truncate = VACOPT_TERNARY_DEFAULT;
/* Parse options list */
//解析选项链表
foreach(lc, vacstmt->options)
{
DefElem *opt = (DefElem *) lfirst(lc);
/* Parse common options for VACUUM and ANALYZE */
//解析VACUUM和ANALYZE的常用选项
if (strcmp(opt->defname, "verbose") == 0)
verbose = defGetBoolean(opt);
else if (strcmp(opt->defname, "skip_locked") == 0)
skip_locked = defGetBoolean(opt);
else if (!vacstmt->is_vacuumcmd)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
parser_errposition(pstate, opt->location)));
/* Parse options available on VACUUM */
//解析VACUUM的可用参数
else if (strcmp(opt->defname, "analyze") == 0)
analyze = defGetBoolean(opt);
else if (strcmp(opt->defname, "freeze") == 0)
freeze = defGetBoolean(opt);
else if (strcmp(opt->defname, "full") == 0)
full = defGetBoolean(opt);
else if (strcmp(opt->defname, "disable_page_skipping") == 0)
disable_page_skipping = defGetBoolean(opt);
else if (strcmp(opt->defname, "index_cleanup") == 0)
params.index_cleanup = get_vacopt_ternary_value(opt);
else if (strcmp(opt->defname, "truncate") == 0)
params.truncate = get_vacopt_ternary_value(opt);
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
parser_errposition(pstate, opt->location)));
}
/* Set vacuum options */
//设置vacuum选项
params.options =
(vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
(verbose ? VACOPT_VERBOSE : 0) |
(skip_locked ? VACOPT_SKIP_LOCKED : 0) |
(analyze ? VACOPT_ANALYZE : 0) |
(freeze ? VACOPT_FREEZE : 0) |
(full ? VACOPT_FULL : 0) |
(disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
/* sanity checks on options */
//安全测试
Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
Assert((params.options & VACOPT_VACUUM) ||
!(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
Assert(!(params.options & VACOPT_SKIPTOAST));
/*
* Make sure VACOPT_ANALYZE is specified if any column lists are present.
* 如出现字段列表,则确保指定了VACOPT_ANALYZE选项
* @lguan
* 当清理选项指定了列字段,那么要确保VACOPT_ANALYZE必须指定
*/
if (!(params.options & VACOPT_ANALYZE))
{
ListCell *lc;
foreach(lc, vacstmt->rels)
{
VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
if (vrel->va_cols != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ANALYZE option must be specified when a column list is provided")));
}
}
/*
* All freeze ages are zero if the FREEZE option is given; otherwise pass
* them as -1 which means to use the default values.
* 如指定了FREEZE选项则设置所有freeze ages为0.
* 否则的话,传递-1(即使用默认值).
*/
if (params.options & VACOPT_FREEZE)
{
params.freeze_min_age = 0;
params.freeze_table_age = 0;
params.multixact_freeze_min_age = 0;
params.multixact_freeze_table_age = 0;
}
else
{
params.freeze_min_age = -1;
params.freeze_table_age = -1;
params.multixact_freeze_min_age = -1;
params.multixact_freeze_table_age = -1;
}
/* user-invoked vacuum is never "for wraparound" */
//用户调用的vacuum永远不会是wraparound
params.is_wraparound = false;
/* user-invoked vacuum never uses this parameter */
//用户调用vacuum永远不会使用该参数
params.log_min_duration = -1;
/* Now go through the common routine */
//调用vacuum
vacuum(vacstmt->rels, ¶ms, NULL, isTopLevel);
}
创建测试用例
postgres=# create table t_index_test (id int,v1 char(8),v2 varchar(16));
CREATE TABLE
postgres=# alter table t_index_test add constraint pk_t_index_test primary key(id);
ALTER TABLE
postgres=# insert into t_index_test select generate_series(1,10), concat('v1-', generate_series(1,10)::varchar), concat('v2-', generate_series(1,10)::varchar);
INSERT 0 10
postgres=# select * from t_index_test;
id | v1 | v2
----+----------+-------
1 | v1-1 | v2-1
2 | v1-2 | v2-2
3 | v1-3 | v2-3
4 | v1-4 | v2-4
5 | v1-5 | v2-5
6 | v1-6 | v2-6
7 | v1-7 | v2-7
8 | v1-8 | v2-8
9 | v1-9 | v2-9
10 | v1-10 | v2-10
(10 rows)
postgres=# select pg_backend_pid();
pg_backend_pid
----------------
16227
(1 row)
postgres=# VACUUM (VERBOSE, ANALYZE) t_index_test;
gdb 断点,查看函数调用栈
(gdb) b ExecVacuum
Breakpoint 1 at 0x562e5d12cab0: file vacuum.c, line 89.
(gdb) c
Continuing.
Breakpoint 1, ExecVacuum (pstate=pstate@entry=0x562e5f32e1c0, vacstmt=vacstmt@entry=0x562e5f253970, isTopLevel=isTopLevel@entry=true) at vacuum.c:89
89 {
(gdb) bt
#0 ExecVacuum (pstate=pstate@entry=0x562e5f32e1c0, vacstmt=vacstmt@entry=0x562e5f253970, isTopLevel=isTopLevel@entry=true) at vacuum.c:89
#1 0x0000562e5d29408b in standard_ProcessUtility (pstmt=0x562e5f253c90, queryString=0x562e5f252d70 "VACUUM (VERBOSE, ANALYZE) t_index_test;", context=<optimized out>,
params=0x0, queryEnv=0x0, dest=0x562e5f253d70, completionTag=0x7ffc6ec73380 "") at utility.c:670
#2 0x0000562e5d290ec4 in PortalRunUtility (portal=0x562e5f2bacd0, pstmt=0x562e5f253c90, isTopLevel=<optimized out>, setHoldSnapshot=<optimized out>, dest=0x562e5f253d70,
completionTag=0x7ffc6ec73380 "") at pquery.c:1175
#3 0x0000562e5d291980 in PortalRunMulti (portal=portal@entry=0x562e5f2bacd0, isTopLevel=isTopLevel@entry=true, setHoldSnapshot=setHoldSnapshot@entry=false,
dest=dest@entry=0x562e5f253d70, altdest=altdest@entry=0x562e5f253d70, completionTag=completionTag@entry=0x7ffc6ec73380 "") at pquery.c:1321
#4 0x0000562e5d29255a in PortalRun (portal=portal@entry=0x562e5f2bacd0, count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, run_once=run_once@entry=true,
dest=dest@entry=0x562e5f253d70, altdest=altdest@entry=0x562e5f253d70, completionTag=0x7ffc6ec73380 "") at pquery.c:796
#5 0x0000562e5d28e685 in exec_simple_query (query_string=0x562e5f252d70 "VACUUM (VERBOSE, ANALYZE) t_index_test;") at postgres.c:1215
#6 0x0000562e5d28feab in PostgresMain (argc=<optimized out>, argv=argv@entry=0x562e5f27ee18, dbname=<optimized out>, username=<optimized out>) at postgres.c:4247
#7 0x0000562e5d218bdf in BackendRun (port=0x562e5f277310, port=0x562e5f277310) at postmaster.c:4437
#8 BackendStartup (port=0x562e5f277310) at postmaster.c:4128
#9 ServerLoop () at postmaster.c:1704
#10 0x0000562e5d219b30 in PostmasterMain (argc=3, argv=0x562e5f24ea40) at postmaster.c:1377
#11 0x0000562e5cf9add8 in main (argc=3, argv=0x562e5f24ea40) at main.c:228
调试观察
(gdb) n
101 params.truncate = VACOPT_TERNARY_DEFAULT;
(gdb) p *pstate
$1 = {parentParseState = 0x0, p_sourcetext = 0x562e5f252d70 "VACUUM (VERBOSE, ANALYZE) t_index_test;", p_rtable = 0x0, p_joinexprs = 0x0, p_joinlist = 0x0, p_namespace = 0x0,
p_lateral_active = false, p_ctenamespace = 0x0, p_future_ctes = 0x0, p_parent_cte = 0x0, p_target_relation = 0x0, p_target_rangetblentry = 0x0, p_is_insert = false,
p_windowdefs = 0x0, p_expr_kind = EXPR_KIND_NONE, p_next_resno = 1, p_multiassign_exprs = 0x0, p_locking_clause = 0x0, p_locked_from_parent = false, p_resolve_unknowns = true,
p_queryEnv = 0x0, p_hasAggs = false, p_hasWindowFuncs = false, p_hasTargetSRFs = false, p_hasSubLinks = false, p_hasModifyingCTE = false, p_last_srf = 0x0,
p_pre_columnref_hook = 0x0, p_post_columnref_hook = 0x0, p_paramref_hook = 0x0, p_coerce_param_hook = 0x0, p_ref_hook_state = 0x0}
(gdb) p *vacstmt
$2 = {type = T_VacuumStmt, options = 0x562e5f2537e0, rels = 0x562e5f253940, is_vacuumcmd = true}
(gdb) p vacstmt->options
$3 = (List *) 0x562e5f2537e0
(gdb) p vacstmt->options->next
There is no member named next.
结构体
/* ----------------------
* Vacuum and Analyze Statements
* Vacuum和Analyze声明
*
* Even though these are nominally two statements, it's convenient to use
* just one node type for both.
* 虽然在这里有两种不同的声明,但只需要使用统一的Node类型即可.
* @lguan
* 意思是Vacuum和Analyze使用同一个声明,只是用NodeTag来区分?
* ----------------------
*/
typedef struct VacuumStmt
{
NodeTag type;
List *options; /* list of DefElem nodes */
//VacuumRelation链表,如为NIL-->所有Relation.
List *rels; /* list of VacuumRelation, or NIL for all */
//true则表示VACCUM,false表示ANALYZE
bool is_vacuumcmd; /* true for VACUUM, false for ANALYZE */
} VacuumStmt;
其中options即为用户输入的option,在本例中为“VERBOSE”和“ANALYZE”option为一个链表,这时PG中通用的一个链表结构体
typedef struct List
{
NodeTag type; /* T_List, T_IntList, or T_OidList */
int length;
ListCell *head;
ListCell *tail;
} List;
struct ListCell
{
union
{
void *ptr_value;
int int_value;
Oid oid_value;
} data;
ListCell *next;
};
继续调试
(gdb) n
104 foreach(lc, vacstmt->options)
(gdb) n
106 DefElem *opt = (DefElem *) lfirst(lc);
(gdb) n
109 if (strcmp(opt->defname, "verbose") == 0)
(gdb) p *opt
$4 = {type = T_DefElem, defnamespace = 0x0, defname = 0x562e5f253758 "verbose", arg = 0x0, defaction = DEFELEM_UNSPEC, location = 8}
可以看到为第一个参数“verbose”,这里应该是前面的解析代码中做了转换,大写转换成小写。看一下foreach和lfirst宏
#define foreach(cell, l) \ for ((cell) = list_head(l); (cell) != NULL; (cell) = lnext(cell))
#define lfirst(lc) ((lc)->data.ptr_value)
可以看到option这个链表中的节点指针所指向的位置是一个DefElem的结构体,由于封装原因,所以在链表中是一个void类型的指针,可以指向任意的类型,所以可以猜测在前面的调用函数中,在构建VacuumStmt结构体时应该已经option链表中的节点实际数据设置成了DefElem,(这里只是个人猜测,这一代码值得深入连接以下,因为PG出现大量的这种类型的写法,具有模拟c++类,封装的实现的意思)。看以下DefElem结构体
/*
* DefElem - a generic "name = value" option definition
* @lguan
* DefElem - 通用的“name=value”的选项的定义,就是说该选项的名字即为该选项的值
*
* In some contexts the name can be qualified. Also, certain SQL commands
* allow a SET/ADD/DROP action to be attached to option settings, so it's
* convenient to carry a field for that too. (Note: currently, it is our
* practice that the grammar allows namespace and action only in statements
* where they are relevant; C code can just ignore those fields in other
* statements.)
* @lguan
* 在某些语境下,名称可以是限定的。
* 此外,某些 SQL 命令允许将 SET/ADD/DROP 操作附加到选项设置,因此携带字段也很方便。
*/
typedef enum DefElemAction
{
DEFELEM_UNSPEC, /* no action given */
DEFELEM_SET,
DEFELEM_ADD,
DEFELEM_DROP
} DefElemAction;
typedef struct DefElem
{
NodeTag type;
char *defnamespace; /* NULL if unqualified name */
char *defname;
Node *arg; /* a (Value *) or a (TypeName *) */
DefElemAction defaction; /* unspecified action, or SET/ADD/DROP */
int location; /* token location, or -1 if unknown */
} DefElem;
继续调试
(gdb) n
110 verbose = defGetBoolean(opt);
(gdb) n
106 DefElem *opt = (DefElem *) lfirst(lc);
(gdb) p verbose
$5 = true
(gdb) n
109 if (strcmp(opt->defname, "verbose") == 0)
(gdb) p *opt
$6 = {type = T_DefElem, defnamespace = 0x0, defname = 0x562e5d566c01 "analyze", arg = 0x0, defaction = DEFELEM_UNSPEC, location = 17}
(gdb) n
111 else if (strcmp(opt->defname, "skip_locked") == 0)
(gdb) n
113 else if (!vacstmt->is_vacuumcmd)
(gdb) n
120 else if (strcmp(opt->defname, "analyze") == 0)
(gdb) n
121 analyze = defGetBoolean(opt);
(gdb) n
下个参数和之前的一样的逻辑,掠过继续调试
(gdb) n
140 params.options =
(gdb) p analyze
$7 = <optimized out>
(gdb) n
158 if (!(params.options & VACOPT_ANALYZE))
(gdb) p analyze
$8 = <optimized out>
(gdb) p params
$9 = {options = 7, freeze_min_age = 0, freeze_table_age = 1598457328, multixact_freeze_min_age = 22062, multixact_freeze_table_age = 1598458104, is_wraparound = 46,
log_min_duration = 208, index_cleanup = VACOPT_TERNARY_DEFAULT, truncate = VACOPT_TERNARY_DEFAULT}
(gdb) n
177 if (params.options & VACOPT_FREEZE)
(gdb) n
189 params.multixact_freeze_table_age = -1;
(gdb) n
193 params.is_wraparound = false;
(gdb) n
196 params.log_min_duration = -1;
(gdb) n
199 vacuum(vacstmt->rels, ¶ms, NULL, isTopLevel);
(gdb) p params
$10 = {options = 7, freeze_min_age = -1, freeze_table_age = -1, multixact_freeze_min_age = -1, multixact_freeze_table_age = -1, is_wraparound = false, log_min_duration = -1,
index_cleanup = VACOPT_TERNARY_DEFAULT, truncate = VACOPT_TERNARY_DEFAULT}
(gdb)
后面就是在设置结构体VacuumParams的值了
typedef enum VacuumOption
{
VACOPT_VACUUM = 1 << 0, /* do VACUUM */
VACOPT_ANALYZE = 1 << 1, /* do ANALYZE */
VACOPT_VERBOSE = 1 << 2, /* print progress info */
VACOPT_FREEZE = 1 << 3, /* FREEZE option */
VACOPT_FULL = 1 << 4, /* FULL (non-concurrent) vacuum */
VACOPT_SKIP_LOCKED = 1 << 5, /* skip if cannot get lock */
VACOPT_SKIPTOAST = 1 << 6, /* don't process the TOAST table, if any */
VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7 /* don't skip any pages */
} VacuumOption;
/*
* A ternary value used by vacuum parameters.
* vacuum的参数的三元值
*
* DEFAULT value is used to determine the value based on other
* configurations, e.g. reloptions.
* 默认值代表该参数是基于其他的配置的,比如说:对应关系(表)的选项
*/
typedef enum VacOptTernaryValue
{
VACOPT_TERNARY_DEFAULT = 0,
VACOPT_TERNARY_DISABLED,
VACOPT_TERNARY_ENABLED,
} VacOptTernaryValue;
/*
* Parameters customizing behavior of VACUUM and ANALYZE.
* 客户端调用VACUUM/ANALYZE时的定制化参数
*
* Note that at least one of VACOPT_VACUUM and VACOPT_ANALYZE must be set
* in options.
* 注意至少VACOPT_VACUUM/VACOPT_ANALYZE在选项中设置.
*/
typedef struct VacuumParams
{
int options; /* bitmask of VacuumOption VacuumOption的位掩码*/
//最小freeze age,-1表示使用默认
int freeze_min_age; /* min freeze age, -1 to use default */
//当freeze age 达到 freeze_table_age 时需要扫描整个table
int freeze_table_age; /* age at which to scan whole table */
//最小的multixact freeze age,-1表示默认
int multixact_freeze_min_age; /* min multixact freeze age, -1 to
* use default */
//当min multixact freeze age 达到 multixact_freeze_table_age 时需要扫描整个table
int multixact_freeze_table_age; /* multixact age at which to scan
* whole table */
//是否强制wraparound?
bool is_wraparound; /* force a for-wraparound vacuum */
//以毫秒为单位的最小执行阈值
int log_min_duration; /* minimum execution threshold in ms at
* which verbose logs are activated, -1
* to use default */
//进行索引vacuum和清理,默认值由索引对应的表的选项来决定
VacOptTernaryValue index_cleanup; /* Do index vacuum and cleanup,
* default value depends on reloptions */
//在末尾截断空页,默认值由对应的表的选项决定
VacOptTernaryValue truncate; /* Truncate empty pages at the end,
* default value depends on reloptions */
} VacuumParams;
这里用了一个局部变量params作为参数,调用vacuum主实现函数。在本例中没有进入
if (!(params.options & VACOPT_ANALYZE))
{
ListCell *lc;
foreach(lc, vacstmt->rels)
{
VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
if (vrel->va_cols != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ANALYZE option must be specified when a column list is provided")));
}
}
所以没有观察到vacstmt->rels这个链表的结构,因此,再调试一次,去掉ANALYZE选项。结构体
/*
* Info about a single target table of VACUUM/ANALYZE.
* VACUUM/ANALYZE命令的目标表信息.
*
* If the OID field is set, it always identifies the table to process.
* Then the relation field can be NULL; if it isn't, it's used only to report
* failure to open/lock the relation.
* 如设置了OID字段,该值通常是将要处理的数据表的oid值,那么关系字段可以为NULL。
* 如果不是,则仅用于报告未能打开/锁定关系。
*/
typedef struct VacuumRelation
{
NodeTag type;
RangeVar *relation; /* table name to process, or NULL */
Oid oid; /* table's OID; InvalidOid if not looked up */
//表的列字段名,如果是NIL表示VACUUM所有列
List *va_cols; /* list of column names, or NIL for all */
} VacuumRelation;
然后根据gdb结果
(gdb) p *vrel
$6 = {type = T_VacuumRelation, relation = 0x562e5f253758, oid = 0, va_cols = 0x0}
oid字段为0,不知道是不是注释有误的原因????? 看一下结构体
/*
* RangeVar - range variable, used in FROM clauses
* RangeVar - 范围变量,用于 FROM 子句
*
* Also used to represent table names in utility statements; there, the alias
* field is not used, and inh tells whether to apply the operation
* recursively to child tables. In some contexts it is also useful to carry
* a TEMP table indication here.
* 也用于表示实用程序语句中的表名。这种情况下,没有使用alias字段,并且 inh 用于表示是否将操作递归地应用于子表。
* 在某些情况下,在此处携带 TEMP 表指示也很有用。????
*/
typedef struct RangeVar
{
NodeTag type;
char *catalogname; /* the catalog (database) name, or NULL */
char *schemaname; /* the schema name, or NULL */
char *relname; /* the relation/sequence name */
bool inh; /* expand rel by inheritance? recursively act
* on children? */
char relpersistence; /* see RELPERSISTENCE_* in pg_class.h */
Alias *alias; /* table alias & optional column aliases */
int location; /* token location, or -1 if unknown */
} RangeVar;
(gdb) p *(RangeVar *)(vrel->relation)
$10 = {type = T_RangeVar, catalogname = 0x0, schemaname = 0x0, relname = 0x562e5f253738 "t_index_test", inh = true, relpersistence = 112 'p', alias = 0x0, location = 15}
(gdb)
基本调试完成。总结如下,结构体VacuumStmt是vacuum的主结构体,其中的option链表是用于输入参数的链表集合,该链表中的节点类型应该是DefElem结构体,rels链表是用户想要进行vacuum的表的集合,该链表的节点类型为VacuumRelation结构体。而VacuumParams结构体中保存的是进行vacuum操作时的一些配置参数,其中的option和上面的option不是一样,这里的option是通过解析上面的option,再通过VacuumOption这个结构体,生成的一个位掩码,来代表再上面的option(也就是用户输入的vacuum选项) 再次感谢 http://blog.itpub.net/6906/大神这种分享知识的精神也正符合PG开源社区的宗旨。
