glibc(2.26) free原理及源码分析

上一篇文章主要介绍了glibc2.26下Linux堆的相关概念和分配原理,本文将继续根据glibc2.26的源码介绍Linux的堆释放原理。

__libc_free

free函数,主要就是判断chunk是否是由mmap分配的,如果是就调用munmap_chunk,如果不是就调用_int_free

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
    void __libc_free(void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */
//如果__free_hook不为空就执行__free_hook
void (*hook)(void *, const void *) = atomic_forced_read(__free_hook);
if (__builtin_expect(hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS(0));
return;
}

if (mem == 0) /* free(0) has no effect */
return;
//将指向用户数据的指针转换成指向chunk头的指针
p = mem2chunk(mem);
//判断p是否是由mmap分配
if (chunk_is_mmapped(p)) /* release mmapped memory. */
{
/* See if the dynamic brk/mmap threshold needs adjusting.
Dumped fake mmapped chunks do not affect the threshold. */
//检查是否需要调整阀值
if (!mp_.no_dyn_threshold && chunksize_nomask(p) > mp_.mmap_threshold && chunksize_nomask(p) <= DEFAULT_MMAP_THRESHOLD_MAX && !DUMPED_MAIN_ARENA_CHUNK(p))
{
mp_.mmap_threshold = chunksize(p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
//调用munmap_chunk来释放由mmap分配的chunk
munmap_chunk(p);
return;
}
//如果tcache没有初始化就先初始化tcache
MAYBE_INIT_TCACHE();

ar_ptr = arena_for_chunk(p);//p所对应的malloc_state结构体
_int_free(ar_ptr, p, 0);//调用_int_free进行更进一步的free操作
}

munmap_chunk

munmap_chunk找到p的前一个chunk,然后利用系统调用__munmap将两个chunk一起释放,但是mmap分配的chunk是独立的,所以大部分情况下只是释放了p

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
static void
internal_function
munmap_chunk(mchunkptr p)
{
INTERNAL_SIZE_T size = chunksize(p);

assert(chunk_is_mmapped(p));

/* Do nothing if the chunk is a faked mmapped chunk in the dumped
main arena. We never free this memory. */
if (DUMPED_MAIN_ARENA_CHUNK(p))
return;

uintptr_t block = (uintptr_t)p - prev_size(p);//前一个chunk的指针
size_t total_size = prev_size(p) + size;//两个chunk的size之和
/* Unfortunately we have to do the compilers job by hand here. Normally
we would test BLOCK and TOTAL-SIZE separately for compliance with the
page size. But gcc does not recognize the optimization possibility
(in the moment at least) so we combine the two values into one before
the bit test. */
if (__builtin_expect(((block | total_size) & (GLRO(dl_pagesize) - 1)) != 0, 0))
malloc_printerr("munmap_chunk(): invalid pointer");

atomic_decrement(&mp_.n_mmaps);
atomic_add(&mp_.mmapped_mem, -total_size);//mmapped_mem减去total_size

/* If munmap failed the process virtual memory address space is in a
bad shape. Just leave the block hanging around, the process will
terminate shortly anyway since not much can be done. */
__munmap((char *)block, total_size);//系统调用
}

_int_free

先尝试放入tcache,如果不行就尝试放入fast bin,最后进行合并操作后放入unsorted bin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static void
_int_free(mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size; /* its size */
mfastbinptr *fb; /* associated fastbin */
mchunkptr nextchunk; /* next contiguous chunk */
INTERNAL_SIZE_T nextsize; /* its size */
int nextinuse; /* true if nextchunk is used */
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
mchunkptr bck; /* misc temp for linking */
mchunkptr fwd; /* misc temp for linking */

size = chunksize(p);

/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect((uintptr_t)p > (uintptr_t)-size, 0) || __builtin_expect(misaligned_chunk(p), 0))
malloc_printerr("free(): invalid pointer");
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
//对size进行检查
if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size)))
malloc_printerr("free(): invalid size");

check_inuse_chunk(av, p);

tcache

1
2
3
4
5
6
7
8
9
10
11
12
#if USE_TCACHE
{
size_t tc_idx = csize2tidx(size);

//如果tcache未满,直接放入tcache后返回
if (tcache && tc_idx < mp_.tcache_bins && tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put(p, tc_idx);//将chunk放入对应的tcache链表
return;
}
}
#endif

放入tcache后会直接返回,不会做后面的合并操作

fast bin

如果chunk属于fast bin的范围,就将chunk放入fast bin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  /*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
*/
//如果p的size小于等于fast bin的最大size则将p放入fast bin
if ((unsigned long)(size) <= (unsigned long)(get_max_fast())

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
//如果TRIM_FASTBINS被设置为1则不要将与top chunk相邻的chunk放入fast bin
&& (chunk_at_offset(p, size) != av->top)
#endif
)
{
//检查下一个chunk的size的合法性
if (__builtin_expect(chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ, 0) || __builtin_expect(chunksize(chunk_at_offset(p, size)) >= av->system_mem, 0))
{
bool fail = true;
/* We might not have a lock at this point and concurrent modifications
of system_mem might result in a false positive. Redo the test after
getting the lock. */
//如果刚才的结果是在不加锁的情况下获得的,就会由于系统的并发修改而不正确
//所以在加锁的情况下再做一遍合法性测试
if (!have_lock)
{
__libc_lock_lock(av->mutex);
fail = (chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ || chunksize(chunk_at_offset(p, size)) >= av->system_mem);
__libc_lock_unlock(av->mutex);
}

if (fail)
malloc_printerr("free(): invalid next size (fast)");
}
//将p的数据部分清空
free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);

atomic_store_relaxed(&av->have_fastchunks, true);//将have_fastchunks置为1
unsigned int idx = fastbin_index(size);
fb = &fastbin(av, idx);

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;

if (SINGLE_THREAD_P)
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
//检查fast bin第一个chunk是不是要放入的chunk,防止double free
if (__builtin_expect(old == p, 0))
malloc_printerr("double free or corruption (fasttop)");
//将p插入fast bin链表
p->fd = old;
*fb = p;
}
else
do
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect(old == p, 0))
malloc_printerr("double free or corruption (fasttop)");
//将p插入fast bin链表
p->fd = old2 = old;
} while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2)) != old2);

/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
allocated again. */
//在保证加锁的情况下检查新的chunk是否加入了正确的fast bin链表
if (have_lock && old != NULL && __builtin_expect(fastbin_index(chunksize(old)) != idx, 0))
malloc_printerr("invalid fastbin entry (free)");
}

unsorted bin and consolidate

如果不能放入fast bin就放入unsorted bin,顺便与相邻的空闲chunk进行合并

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
Consolidate other non-mmapped chunks as they arrive.
*/
//如果p不满足fast bin,并且不属于mmap,就与相邻的未使用chunk合并放入unsorted bin
else if (!chunk_is_mmapped(p))
{

/* If we're single-threaded, don't lock the arena. */
if (SINGLE_THREAD_P)
have_lock = true;

if (!have_lock)
__libc_lock_lock(av->mutex);

nextchunk = chunk_at_offset(p, size);

/* Lightweight tests: check whether the block is already the
top block. */
//一些安全性检查:检查p是否是top chunk
if (__glibc_unlikely(p == av->top))
malloc_printerr("double free or corruption (top)");
/* Or whether the next chunk is beyond the boundaries of the arena. */
//检查p的下一个chunk是否已经超出arena区域
if (__builtin_expect(contiguous(av) && (char *)nextchunk >= ((char *)av->top + chunksize(av->top)), 0))
malloc_printerr("double free or corruption (out)");
/* Or whether the block is actually not marked used. */
//检查p是否实际上没有被标记为正在使用
if (__glibc_unlikely(!prev_inuse(nextchunk)))
malloc_printerr("double free or corruption (!prev)");

nextsize = chunksize(nextchunk);
//检查下一个chunk的size的合法性
if (__builtin_expect(chunksize_nomask(nextchunk) <= 2 * SIZE_SZ, 0) || __builtin_expect(nextsize >= av->system_mem, 0))
malloc_printerr("free(): invalid next size (normal)");
//清空p的数据区域
free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);

/* consolidate backward */
//如果上一个chunk没有被使用,则向上合并
if (!prev_inuse(p))
{
prevsize = prev_size(p);
size += prevsize;
p = chunk_at_offset(p, -((long)prevsize));
unlink(av, p, bck, fwd);//将p从链表中解出
}

//如果下一个chunk不是top chunk并且没有被使用,则向下合并
if (nextchunk != av->top)
{
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

/* consolidate forward */
if (!nextinuse)
{
unlink(av, nextchunk, bck, fwd);//将p的下一个chunk从链表中解出
size += nextsize;
}
//如果下一个chunk被使用了则将当前chunk设置为未使用
else
clear_inuse_bit_at_offset(nextchunk, 0);

/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/
//将p放入unsorted bin链表
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck))
malloc_printerr("free(): corrupted unsorted chunks");
p->fd = fwd;
p->bk = bck;
//如果p的大小属于large bin的范围就将fd_nextsize和bk_nextsize指针置空
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;

set_head(p, size | PREV_INUSE);
set_foot(p, size);

check_free_chunk(av, p);
}

/*
If the chunk borders the current high end of memory,
consolidate into top
*/
//如果p与top chunk相邻就合并到top chunk里
else
{
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}

/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.

Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/
//如果要合并之后chunk过大,就合并fast bin,并考虑减小top chunk
if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD)
{
if (atomic_load_relaxed(&av->have_fastchunks))
malloc_consolidate(av);//合并fast bin
//如果是main arena,并且top chunk的大小超出了阀值就缩减top chunk的大小
if (av == &main_arena)
{
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))
systrim(mp_.top_pad, av);
#endif
}
//如果不是main arena就调用heap_trim缩减大小
else
{
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));

assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}

if (!have_lock)
__libc_lock_unlock(av->mutex);
}
/*
If the chunk was allocated via mmap, release via munmap().
*/
//如果chunk是通过mmap分配的就调用munmap_chunk来释放
else
{
munmap_chunk(p);
}
}

tcache_put

因为tcache_put没有做太多的安全性检查,所以在调用之前要确保tc_idx合法且tcache链未满

1
2
3
4
5
6
7
8
9
10
11
12
13
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put(mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *)chunk2mem(chunk);//将chunk转成内存地址形式
assert(tc_idx < TCACHE_MAX_BINS);//确保tc_idx的合法性
//将e插入链表
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
//链表中的chunk数加一
++(tcache->counts[tc_idx]);
}

unlink将一个chunk从链表中移除,但是它只检查了P前后的chunk是否都指向它,如果我们伪造P的fdbk,可以将一个 内存地址ptr的值修改为ptr-0x18的值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/* Take a chunk off a bin list */
#define unlink(AV, P, BK, FD) \
{ \
//检查p的两处size是否一致,即p中存储的自己的size和p的下一个chunk存储的p的size
if (__builtin_expect(chunksize(P) != prev_size(next_chunk(P)), 0)) \
malloc_printerr("corrupted size vs. prev_size"); \
FD = P->fd; \
BK = P->bk; \
//检查p的前后两个chunk是否都指向p
if (__builtin_expect(FD->bk != P || BK->fd != P, 0)) \
malloc_printerr("corrupted double-linked list"); \
else \
{ \
//将p从链表中移除
FD->bk = BK; \
BK->fd = FD; \
//如果是large bin还要处理nextsize指针
if (!in_smallbin_range(chunksize_nomask(P)) && __builtin_expect(P->fd_nextsize != NULL, 0)) \
{ \
//检查前后chunk的nextsize指针是否合法
if (__builtin_expect(P->fd_nextsize->bk_nextsize != P, 0) || __builtin_expect(P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr("corrupted double-linked list (not small)"); \
//根据不同情况将p从nextsize链中移除
if (FD->fd_nextsize == NULL) \
{ \
//如果链表中只有一种大小的chunk
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else \
{ \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize = P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} \
else \
{ \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
}

malloc_consolidate

malloc_consolidate将遍历所有fast bin链表中的所有chunk,将它们与相邻的空闲chunk合并,然后放入unsorted bin中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
------------------------- malloc_consolidate -------------------------

malloc_consolidate is a specialized version of free() that tears
down chunks held in fastbins. Free itself cannot be used for this
purpose since, among other things, it might place chunks back onto
fastbins. So, instead, we need to use a minor variant of the same
code.

Also, because this routine needs to be called the first time through
malloc anyway, it turns out to be the perfect place to trigger
initialization code.
*/

static void malloc_consolidate(mstate av)
{
mfastbinptr *fb; /* current fastbin being consolidated */
mfastbinptr *maxfb; /* last fastbin (for loop control) */
mchunkptr p; /* current chunk being consolidated */
mchunkptr nextp; /* next chunk to consolidate */
mchunkptr unsorted_bin; /* bin header */
mchunkptr first_unsorted; /* chunk to link to */

/* These have same use as in free() */
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;

/*
If max_fast is 0, we know that av hasn't
yet been initialized, in which case do so below
*/

if (get_max_fast() != 0)//如果av已经初始化
{
//设置av中没有fastchunks,因为即将全部合并
atomic_store_relaxed(&av->have_fastchunks, false);

unsorted_bin = unsorted_chunks(av);

/*
Remove each chunk from fast bin and consolidate it, placing it
then in unsorted bin. Among other reasons for doing this,
placing in unsorted bin avoids needing to calculate actual bins
until malloc is sure that chunks aren't immediately going to be
reused anyway.
*/

maxfb = &fastbin(av, NFASTBINS - 1);//size最大的fastbin
fb = &fastbin(av, 0);//size最小的fastbin
do
{
p = atomic_exchange_acq(fb, NULL);//p=fb
if (p != 0)
{
do
{
check_inuse_chunk(av, p);
nextp = p->fd;

/* Slightly streamlined version of consolidation code in free() */
size = chunksize(p);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);

//如果前一个chunk是没有被使用,就让它与p合并
if (!prev_inuse(p))
{
prevsize = prev_size(p);
size += prevsize;
p = chunk_at_offset(p, -((long)prevsize));
unlink(av, p, bck, fwd);
}

//如果后一个chunk不是top chunk
if (nextchunk != av->top)
{
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

if (!nextinuse)//如果后一个chunk没有被使用
{
size += nextsize;
unlink(av, nextchunk, bck, fwd);//将后一个chunk从链表中解除
//p的size增加,后一个chunk从链表中移除,即将p和后一个chunk合并
}
else
//将p设置为被使用状态
clear_inuse_bit_at_offset(nextchunk, 0);

//将p放入unsorted bin链表
first_unsorted = unsorted_bin->fd;
unsorted_bin->fd = p;
first_unsorted->bk = p;

//如果属于large bin将nextsize指针清空
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}

//设置p相关的属性
set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin;
p->fd = first_unsorted;
set_foot(p, size);
}

//如果下一个chunk是top chunk就将p并入top chunk
else
{
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
}

} while ((p = nextp) != 0);//遍历链表中的每一个chunk
}
} while (fb++ != maxfb);//遍历所有fast bin链表
}
else//如果av没有初始化就先初始化
{
malloc_init_state(av);
check_malloc_state(av);
}
}

总结

free的总体流程为:首先判断chunk是否由mmap分配,如果是调用munmap_chunk来释放,然后返回,否则进入_int_free来释放。在_int_free中,先判断是否满足tcache的范围,并且相应的tcache链表未满,如果是,将chunk放入tcache链表中后直接返回,否则判断是否属于fast bin的范围,如果是放入fast bin后结束,否则再次判断chunk是否由mmap分配,如果是调用munmap_chunk来释放,然后结束,如果不是且上一个chunk没有被使用,就向上合并。然后判断下一个chunk是不是top chunk,如果是则直接将当前chunk合并到top chunk,如果不是且下一个chunk没有被使用就向下合并,然后将合并后的chunk放入unsorted bin中。最后如果合并后的chunk的大小超过阈值,就调用malloc_consolidate来合并fast bin中的所有chunk并放入unsorted bin中,并考虑缩减top chunk。流程图如下:

5RkpjI.png

文章目录
  1. 1. __libc_free
    1. 1.1. munmap_chunk
    2. 1.2. _int_free
      1. 1.2.1. tcache
      2. 1.2.2. fast bin
      3. 1.2.3. unsorted bin and consolidate
    3. 1.3. tcache_put
    4. 1.4. unlink
    5. 1.5. malloc_consolidate
  2. 2. 总结
|