#跟踪innodb_flush_method代码

在innodb启动时设置的只读参数。
srv0start.cc\innobase_start_or_create_for_mysql(void)初始化时设置参数
os0file.cc\pfs_os_file_t os_file_create_func()控制打开文件的方式,对数据页和日志页分情况设置是否采用系统缓存。
log0log.cc

1
2
3
4
log_io_complete()
log_checkpoint()
log_write_up_to()
log_write_up_to_low()

控制日志落盘的方式

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
//主线程
//innobase初始化时设置参数。
dberr_t
innobase_start_or_create_for_mysql(void)
......
#ifndef _WIN32
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC;

} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;

} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;

} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;

} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;

} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;

...
#endif
}

//打开文件
pfs_os_file_t
os_file_create_func(
const char* name,
ulint create_mode,
ulint purpose,
ulint type,
bool read_only,
bool* success)
{

.....
#ifdef O_SYNC
/*我们让O_SYNC只影响日志文件,由于在2001年使用datasync选项可能扰乱文件,所有把O_DSYMC改为O_SYNC*/

if (!read_only
&& type == OS_LOG_FILE
&& srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
create_flag |= O_SYNC;
}
#endif /* O_SYNC */

bool retry;
do {
file.m_file = ::open(name, create_flag, os_innodb_umask);

if (file.m_file == -1) {
const char* operation;

operation = (create_mode == OS_FILE_CREATE
&& !read_only) ? "create" : "open";

*success = false;

if (on_error_no_exit) {
retry = os_file_handle_error_no_exit(
name, operation, on_error_silent);
} else {
retry = os_file_handle_error(name, operation);
}
} else {
*success = true;
retry = false;
}

} while (retry);

/* 我们只有写数据页时对O_DIRECT关闭系统缓存*/
if (!read_only
&& *success
&& (type != OS_LOG_FILE && type != OS_DATA_TEMP_FILE)
&& (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
|| srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {

os_file_set_nocache(file.m_file, name, mode_str);
}
.......

}




//io thread
//开始完成一次IO
void
log_io_complete(
/*============*/
log_group_t* group) /*!< in: log group or a dummy pointer */
{

.......
#ifdef _WIN32
fil_flush(group->space_id);
#else
switch (srv_unix_file_flush_method) {
case SRV_UNIX_O_DSYNC:
case SRV_UNIX_NOSYNC:
break;//由于O_DSYNC模式对redo log刷盘不从OS缓存刷,所以不调用fil_flush(group->space_id);
case SRV_UNIX_FSYNC:
case SRV_UNIX_LITTLESYNC:
case SRV_UNIX_O_DIRECT:
case SRV_UNIX_O_DIRECT_NO_FSYNC:
fil_flush(group->space_id);//redo log从OS缓存刷新到硬盘
}
#endif /* _WIN32 */

DBUG_PRINT("ib_log", ("checkpoint info written to group %u",
unsigned(group->id)));
log_io_complete_checkpoint();

return;
}

ut_error; /*!< We currently use synchronous writing of the
logs and cannot end up here! */
}


//srv_master thread
/**创建一个checkpoint。这个函数不从buffer pool 刷脏,它只是检查缓冲池最早的lsn,并且将关于lsn的信息写入日志文件。使用log_make_checkpont_at()刷缓冲池*/

bool
log_checkpoint(
bool sync,
bool write_always)
{
.....

#ifndef _WIN32
switch (srv_unix_file_flush_method) {
case SRV_UNIX_NOSYNC:
break;
case SRV_UNIX_O_DSYNC:
case SRV_UNIX_FSYNC:
case SRV_UNIX_LITTLESYNC:
case SRV_UNIX_O_DIRECT:
case SRV_UNIX_O_DIRECT_NO_FSYNC:
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS.
@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
}
#endif /* !_WIN32 */
.....
}


//buf_flush_page_cleaner_coordinator thread
/**
Ensure that the log has been written to the log file up to a given
log entry (such as that of a transaction commit). Start a new write, or
wait and check if an already running write is covering the request.*/



void
log_write_up_to(
lsn_t lsn,
bool flush_to_disk)
{
......

log_sys->write_lsn = write_lsn;
#ifndef _WIN32
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
/* O_SYNC 意味着 OS 不会缓存日志:所以我们要从log buffer直接把log刷盘*/
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
#endif /* !_WIN32 */
.....

if (flush_to_disk) {
log_write_flush_to_disk_low();
}
}

static
void
log_write_flush_to_disk_low()
{
ut_a(log_sys->n_pending_flushes == 1); /* No other threads here */

#ifndef _WIN32 //如果是参数为O_DSYNC,返回false;如果其他,则为true
bool do_flush = srv_unix_file_flush_method != SRV_UNIX_O_DSYNC;
#else
bool do_flush = true;
#endif
if (do_flush) {
log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
fil_flush(group->space_id);//通过OS buffer将log刷盘
log_sys->flushed_to_disk_lsn = log_sys->current_flush_lsn;
}
........
}