1#ifndef _GNU_SOURCE
2# define _GNU_SOURCE
3#endif
4
5#include <errno.h>
6#include <stdarg.h>
7#include <stdbit.h>
8#include <stdio.h>
9#include <stdint.h>
10#include <stdlib.h>
11#include <string.h>
12#include <threads.h>
13#include <sys/mman.h>
14
15#include "alignment.h"
16#include "allocator.h"
17#include "dump.h"
18#include "src/word.h"
19
20// unit size should not be less than size of pointer
21#define UNIT_SIZE 16
22
23// threads serialization
24static mtx_t lock;
25
26/****************************************************************
27 * Trace/debug output
28 */
29
30static void print_msg(const char* func_name, char* fmt, ...)
31{
32 fprintf(stderr, "Bitmap allocator -- %s: ", func_name);
33 va_list ap;
34 va_start(ap);
35 vfprintf(stderr, fmt, ap);
36 va_end(ap);
37}
38
39#define ERR(...) print_msg(__func__, __VA_ARGS__)
40
41#define SAY(...) do { if (pet_allocator.verbose) { print_msg(__func__, __VA_ARGS__); } } while(false)
42
43#ifdef DEBUG
44# define TRACE(...) do { if (pet_allocator.trace) { print_msg(__func__, __VA_ARGS__); } } while(false)
45#else
46# define TRACE(...)
47#endif
48
49
50/****************************************************************
51 * Stats
52 */
53
54static AllocatorStats stats = {};
55
56static atomic_size_t num_bm_pages = 0;
57
58/****************************************************************
59 * memory cleaning
60 */
61
62static void cleanse(void* addr, unsigned start, unsigned end)
63{
64 TRACE("addr=%p, start=%u, end=%u\n", addr, start, end);
65
66 unsigned length = end - start;
67
68 // clean bytes till start of word
69 uint8_t* byteptr = addr;
70 byteptr += start;
71 unsigned nbytes = start & (sizeof(Word) - 1);
72 if (nbytes) {
73 nbytes = sizeof(Word) - nbytes;
74 if (nbytes > length) {
75 nbytes = length;
76 }
77 TRACE("leading %u bytes\n", nbytes);
78 for (unsigned i = 0; i < nbytes; i++) {
79 *byteptr++ = 0;
80 length--;
81 }
82 }
83
84 // clean words
85 TRACE("%u words\n", length / sizeof(Word));
86 Word* wordptr = (Word*) byteptr;
87 while (length >= sizeof(Word)) {
88 *wordptr++ = 0;
89 length -= sizeof(Word);
90 }
91
92 if (length) {
93 // clean remaining bytes
94 TRACE("remaining %u bytes\n", length);
95 byteptr = (uint8_t*) wordptr;
96 while (length--) {
97 *byteptr++ = 0;
98 }
99 }
100}
101
102/****************************************************************
103 * mmap/mremap/munmap wrappers
104 */
105
106static void* call_mmap(unsigned size, bool clean)
107/*
108 * call mmap to allocate pages
109 *
110 * size must be multiple of sys_page_size
111 *
112 * bear in mind mmap called immediately after munmap may return dirty page,
113 * so explicit cleaning is a must
114 */
115{
116 void* result = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
117 if (result == MAP_FAILED) {
118 ERR("mmap: %s\n", strerror(errno));
119 return nullptr;
120 }
121 if (clean) {
122 cleanse(result, 0, size);
123 }
124 return result;
125}
126
127static inline void call_munmap(void* addr, unsigned size)
128{
129 if (munmap(addr, size) == -1) {
130 ERR("munmap(%p, %u): %s\n", addr, size, strerror(errno));
131 }
132}
133
134static void* call_mremap(void* addr, unsigned old_nbytes, unsigned new_nbytes, bool clean)
135/*
136 * old/new_nbytes are unaligned
137 */
138{
139 unsigned old_size = align_unsigned_to_page(old_nbytes);
140 unsigned new_size = align_unsigned_to_page(new_nbytes);
141 if (new_size == old_size) {
142 if (clean && new_nbytes > old_nbytes) {
143 cleanse(addr, old_nbytes, new_nbytes);
144 }
145 return addr;
146 }
147 int flags;
148 if (new_size > old_size) {
149 flags = MREMAP_MAYMOVE;
150 } else {
151 flags = 0;
152 clean = false; // don't clean when shrinking
153 }
154 void* new_addr = mremap(addr, old_size, new_size, flags);
155 if (new_addr == MAP_FAILED) {
156 ERR("mremap(%p, %u, %u): %s\n", addr, old_size, new_size, strerror(errno));
157 if (new_size > old_size) {
158 // grow failed
159 return nullptr;
160 } else {
161 // shrink failed, return same address
162 return addr;
163 }
164 }
165 if (clean) {
166 cleanse(new_addr, old_nbytes, new_nbytes);
167 }
168 return new_addr;
169}
170
171/****************************************************************
172 * bitmap allocator parameters
173 */
174
175static unsigned units_per_page; // sys_page_size / UNIT_SIZE
176
177static unsigned bm_page_header_size_in_units; // num_reserved_units ???
178/*
179 * Calculated as: (offsetof(BmPageHeader, bitmap)
180 * + units_per_page / 8 // size of bitmap in bytes
181 * + UNIT_SIZE - 1 // rounding
182 * ) / UNIT_SIZE
183 */
184
185static unsigned max_data_units; // sys_page_size - bm_page_header_size_in_units
186
187
188static inline unsigned bytes_to_units(unsigned nbytes)
189{
190 return align_unsigned(nbytes, UNIT_SIZE) / UNIT_SIZE;
191}
192
193/****************************************************************
194 * Bitmap allocator data page and superblock
195 */
196
197typedef struct _BmPageHeader {
198 /*
199 * On 4K page the header takes four 16-byte units, leaving 4032 bytes for data.
200 */
201 struct _BmPageHeader** volatile list;
202 struct _BmPageHeader* next;
203 struct _BmPageHeader* prev;
204
205 // variable part
206
207 // the size of bitmap depends on page size, for 4K it takes 32 bytes
208 Word bitmap[ /* sys_page_size() / UNIT_SIZE / WORD_WIDTH */ ];
209
210} BmPageHeader;
211
212
213static BmPageHeader* volatile lru_page = nullptr;
214/*
215 * last recently used page.
216 *
217 * Technically, this is a list, and functions that grab a page
218 * do not make a difference between this variable and superblock entry.
219 * However, LRU may contain single item only.
220 */
221
222static BmPageHeader** volatile superblock;
223/*
224 * Straightforward definition would be:
225 *
226 * BmPageHeader* superblock[ units_per_page ];
227 *
228 * The array contains pointers to bm_page lists grouped by their
229 * longest free block.
230 *
231 * Superblock cannot take more than one page.
232 * Usually it takes a half, if UNIT_SIZE is twice longer than size of pointer.
233 */
234
235static void dump_bm_page(BmPageHeader* bm_page)
236{
237 fprintf(stderr, "Page %p: list=%p, next=%p, prev=%p\n",
238 (void*) bm_page, (void*) bm_page->list, (void*) bm_page->next, (void*) bm_page->prev);
239 dump_bitmap(stderr, (uint8_t*)(bm_page->bitmap), units_per_page / 8);
240}
241
242static void dump()
243{
244 BmPageHeader** list = superblock;
245 fprintf(stderr, "\nAllocator bm pages: %zu, blocks allocated %zu\n",
246 num_bm_pages, stats.blocks_allocated);
247 if (lru_page) {
248 fprintf(stderr, "LRU page: %p\n", (void*) lru_page);
249 dump_bm_page(lru_page);
250 }
251 for (unsigned i = 0; i < units_per_page; i++, list++) {
252 BmPageHeader* first_page = *list;
253 if (first_page) {
254 fprintf(stderr, "Superblock entry %u: %p -> %p\n", i, (void*) list, (void*) first_page);
255 BmPageHeader* bm_page = first_page;
256 do {
257 dump_bm_page(bm_page);
258 bm_page = bm_page->next;
259 } while (bm_page != first_page);
260 }
261 }
262 fputc('\n', stderr);
263}
264
265/****************************************************************
266 * Basic bitmap functions
267 */
268
269static unsigned count_zero_bits(BmPageHeader* bm_page, unsigned offset, unsigned limit)
270/*
271 * Count consecutive zero bits in the bitmap starting from `offset` bit
272 * up to `limit`. The limit is treated as a hint when to stop, returned count can be greater.
273 */
274{
275 unsigned count = 0;
276 Word* ptr = &bm_page->bitmap[offset / WORD_WIDTH];
277
278 // count starting bits up to the the next word boundary
279 unsigned bit_index = offset & (WORD_WIDTH - 1);
280 if (bit_index) {
281 Word w = *ptr++;
282 w >>= bit_index;
283 if (w) {
284 // we have only ending bits
285 return stdc_trailing_zeros(w);
286 }
287 count = WORD_WIDTH - bit_index;
288 offset += count;
289 }
290
291 // count zero words
292 while (offset < units_per_page && count < limit) {
293 Word w = *ptr++;
294 if (w) {
295 // count ending bits
296 count += stdc_trailing_zeros(w);
297 break;
298 }
299 count += WORD_WIDTH;
300 offset += WORD_WIDTH;
301 }
302 return count;
303}
304
305static unsigned count_nonzero_bits(BmPageHeader* bm_page, unsigned offset, unsigned limit)
306/*
307 * Count consecutive nonzero bits in the bitmap starting from `offset` bit
308 * up to `limit`. The limit is treated as a hint when to stop, returned count can be greater.
309 *
310 * The code is exactly the same as in count_zero_bits, the only difference is inversion.
311 */
312{
313 unsigned count = 0;
314 Word* ptr = &bm_page->bitmap[offset / WORD_WIDTH];
315
316 // count starting bits up to the the next word boundary
317 unsigned bit_index = offset & (WORD_WIDTH - 1);
318 if (bit_index) {
319 Word w = ~*ptr++;
320 w >>= bit_index;
321 if (w) {
322 // we have only ending bits
323 return stdc_trailing_zeros(w);
324 }
325 count = WORD_WIDTH - bit_index;
326 offset += count;
327 }
328
329 // count all-one words
330 while (offset < units_per_page && count < limit) {
331 Word w = ~*ptr++;
332 if (w) {
333 // count ending bits
334 count += stdc_trailing_zeros(w);
335 break;
336 }
337 count += WORD_WIDTH;
338 offset += WORD_WIDTH;
339 }
340 return count;
341}
342
343static void set_bits(BmPageHeader* bm_page, unsigned offset, unsigned length)
344/*
345 * Set bits in the bitmap starting from offset.
346 */
347{
348 TRACE("bm_page=%p offset=%u length=%u\n", (void*) bm_page, offset, length);
349 Word* ptr = &bm_page->bitmap[offset / WORD_WIDTH];
350
351 // set starting bits up to the the next word boundary
352 unsigned bit_index = offset & (WORD_WIDTH - 1);
353 if (bit_index) {
354 Word bitmask = WORD_MAX;
355 unsigned num_bits = WORD_WIDTH - bit_index;
356 if (length <= num_bits) {
357 bitmask &= (((Word) 1) << length) - 1;
358 num_bits = length;
359 }
360 bitmask <<= bit_index;
361 *ptr++ |= bitmask;
362 offset += num_bits;
363 length -= num_bits;
364 }
365
366 // set remaining words
367 while (length >= WORD_WIDTH) {
368 *ptr++ = WORD_MAX;
369 length -= WORD_WIDTH;
370 }
371
372 // set ending bits
373 if (length) {
374 *ptr++ |= (((Word) 1) << length) - 1;
375 }
376}
377
378static void clear_bits(BmPageHeader* bm_page, unsigned offset, unsigned length)
379/*
380 * Clear bits in the bitmap starting from offset.
381 *
382 * The logic is the same as in set_bits.
383 */
384{
385 TRACE("bm_page=%p offset=%u length=%u\n", (void*) bm_page, offset, length);
386 Word* ptr = &bm_page->bitmap[offset / WORD_WIDTH];
387
388 // clear starting bits up to the the next word boundary
389 unsigned bit_index = offset & (WORD_WIDTH - 1);
390 if (bit_index) {
391 Word bitmask = WORD_MAX;
392 unsigned num_bits = WORD_WIDTH - bit_index;
393 if (length <= num_bits) {
394 bitmask &= (((Word) 1) << length) - 1;
395 num_bits = length;
396 }
397 bitmask <<= bit_index;
398 *ptr++ &= ~bitmask;
399 offset += num_bits;
400 length -= num_bits;
401 }
402
403 // clear remaining words
404 while (length >= WORD_WIDTH) {
405 *ptr++ = 0;
406 length -= WORD_WIDTH;
407 }
408
409 // clear ending bits
410 if (length) {
411 *ptr++ &= ~((((Word) 1) << length) - 1);
412 }
413}
414
415/****************************************************************
416 * Bitmap allocator functions
417 */
418
419static unsigned find_free_block(BmPageHeader* bm_page, unsigned block_size)
420/*
421 * Search for free block.
422 * Return offset of the first available block or 0 if no block is found.
423 * Given that first units of bm_page are always in use,
424 * offset can never be zero on success.
425 */
426{
427 unsigned offset = bm_page_header_size_in_units;
428 while (offset < units_per_page) {
429 unsigned length = count_zero_bits(bm_page, offset, block_size);
430 if (length >= block_size) {
431 TRACE("bm_page=%p block_size=%u -> offset=%u\n", (void*) bm_page, block_size, offset);
432 return offset;
433 }
434 offset += length;
435 offset += count_nonzero_bits(bm_page, offset, UINT_MAX);
436 }
437 TRACE("bm_page=%p block_size=%u -> 0\n", (void*) bm_page, block_size);
438 return 0;
439}
440
441static unsigned find_longest_free_block(BmPageHeader* bm_page)
442/*
443 * Search for the longest sequence of zero bits and return its length.
444 */
445{
446 unsigned offset = bm_page_header_size_in_units;
447 unsigned n = max_data_units;
448 unsigned lfb = 0;
449 while (n) {
450 unsigned length = count_zero_bits(bm_page, offset, n);
451 if (length > lfb) {
452 lfb = length;
453 }
454 offset += length;
455 n -= length;
456
457 length = count_nonzero_bits(bm_page, offset, n);
458 offset += length;
459 n -= length;
460 }
461 TRACE("bm_page=%p -> lfb=%u\n", (void*) bm_page, lfb);
462 return lfb;
463}
464
465static void add_to_list(BmPageHeader** list, BmPageHeader* bm_page)
466/*
467 * Add bm_page to circular doubly-linked list.
468 */
469{
470 BmPageHeader* first = *list;
471 if (first) {
472 // add to the end of list
473 bm_page->prev = first->prev;
474 bm_page->next = first;
475 first->prev->next = bm_page;
476 first->prev = bm_page;
477 } else {
478 // init list
479 *list = bm_page->next = bm_page->prev = bm_page;
480 }
481 bm_page->list = list;
482}
483
484static void delete_from_list(BmPageHeader* bm_page)
485/*
486 * Delete bm_page from circular doubly-linked list.
487 */
488{
489 BmPageHeader** list = bm_page->list;
490
491# ifdef DEBUG
492 if (!list) {
493 ERR("double call delete_from_list(%p)\n", (void*) bm_page);
494 abort();
495 }
496 if (list == &lru_page) {
497 TRACE("deleting page %p from LRU\n", (void*) bm_page);
498 } else {
499 TRACE("deleting page %p from superblock[%tu]\n", (void*) bm_page, list - superblock);
500 }
501# endif
502
503 if (bm_page->next == bm_page->prev) {
504 // last page, make list empty
505 *list = nullptr;
506 } else {
507 if (*list == bm_page) {
508 *list = bm_page->next;
509 }
510 bm_page->next->prev = bm_page->prev;
511 bm_page->prev->next = bm_page->next;
512 }
513 bm_page->list = nullptr;
514}
515
516static void add_to_superblock_entry(BmPageHeader* bm_page, unsigned lfb)
517/*
518 * Add page to superblock entry by `lfb`.
519 */
520{
521 mtx_lock(&lock);
522 TRACE("adding page %p to superblock[%tu]\n", (void*) bm_page, lfb);
523 add_to_list(&superblock[lfb], bm_page);
524 mtx_unlock(&lock);
525}
526
527static inline void unhand_page(BmPageHeader* bm_page)
528/*
529 * If `lru_page` is set, move it to superblock.
530 * Assign `bm_page` to `lru_page`.
531 */
532{
533 BmPageHeader* page_to_reclaim = nullptr;
534
535 mtx_lock(&lock);
536 if (lru_page) {
537 // have to scan while lock is held
538 unsigned lfb = find_longest_free_block(lru_page);
539
540 if (lfb < max_data_units) {
541 TRACE("adding lru_page %p to superblock[%tu]\n", (void*) lru_page, lfb);
542 add_to_list(&superblock[lfb], lru_page);
543 } else {
544 // okay to reclaim this page
545 page_to_reclaim = lru_page;
546 }
547 }
548 // now set lru_page
549 TRACE("adding bm_page %p to LRU\n", (void*) bm_page);
550 bm_page->list = (BmPageHeader** /* make compiller happy with volatile */) &lru_page;
551 lru_page = bm_page->next = bm_page->prev = bm_page;
552 mtx_unlock(&lock);
553
554 if (page_to_reclaim) {
555 TRACE("releasing page %p\n", (void*) page_to_reclaim);
556 call_munmap(page_to_reclaim, sys_page_size());
557 atomic_fetch_sub(&num_bm_pages, 1);
558 }
559}
560
561static inline BmPageHeader* bm_page_by_addr(void* addr)
562/*
563 * Get address of the bm_page from `addr`
564 */
565{
566 return (BmPageHeader*) (
567 ((ptrdiff_t) addr) & ~((ptrdiff_t) sys_page_size() - 1)
568 );
569}
570
571static void grab_page(BmPageHeader* bm_page)
572/*
573 * Prepare page for reallocating units
574 * in the context of current thread.
575 * The page is obtained by address using `bm_page_by_addr`.
576 *
577 * Take the page out of whatever list it belongs,
578 * either superblock or LRU.
579 */
580{
581# if DEBUG
582 unsigned msg_interval = 0;
583# endif
584
585 for (;;) {
586 mtx_lock(&lock);
587 if (bm_page->list) {
588 // the page belongs to some list
589 break;
590 }
591 // page is probably in use by other thread
592# if DEBUG
593 if (msg_interval == 0) {
594 TRACE("waiting for page %p to be released\n", (void*) bm_page);
595 msg_interval = 1000;
596 } else {
597 msg_interval--;
598 }
599# endif
600 mtx_unlock(&lock);
601 thrd_yield();
602 }
603# if DEBUG
604 if (bm_page->list == &lru_page) {
605 TRACE("taking page %p out of LRU\n", (void*) bm_page);
606 } else {
607 TRACE("taking page %p out of superblock[%tu]\n", (void*) bm_page, bm_page->list - superblock);
608 }
609# endif
610 delete_from_list(bm_page);
611 mtx_unlock(&lock);
612}
613
614static inline unsigned ptrdiff_to_units(void* addr, BmPageHeader* bm_page)
615// helper function for bm_shrink and bm_release invocation
616{
617 return (
618 ((uint8_t*) addr) - ((uint8_t*) bm_page)
619 ) / UNIT_SIZE;
620}
621
622#ifdef DEBUG
623 static void check_units_allocated(const char* func, BmPageHeader* bm_page,
624 unsigned offset, unsigned num_units)
625 {
626 unsigned n = count_nonzero_bits(bm_page, offset, num_units);
627 if (n < num_units) {
628 print_msg(func, "already released some units on bm_page %p starting from %u: in use %u of %u\n",
629 (void*) bm_page, offset, n, num_units);
630 }
631 }
632#endif
633
634static BmPageHeader* find_available_page(unsigned num_units, unsigned* offset)
635/*
636 * Find available page for new allocation.
637 *
638 * First, get LRU page. If missing, search superblock lists.
639 *
640 * When found, the page is removed either form LRU or from the superblock,
641 * so only current thread continue working with it while other threads
642 * can work with their own pages in parallel.
643 */
644{
645 BmPageHeader* bm_page = nullptr;
646
647 mtx_lock(&lock);
648
649 if (lru_page) {
650 bm_page = lru_page;
651 TRACE("taking page %p out of LRU\n", (void*) bm_page);
652 delete_from_list(bm_page);
653 mtx_unlock(&lock);
654
655 // find free block on the LRU page
656 *offset = find_free_block(bm_page, num_units);
657 if (*offset) {
658 return bm_page;
659 }
660 // LRU page has no space available, move it to superblock
661 add_to_superblock_entry(bm_page, find_longest_free_block(bm_page));
662
663 // continue with superblock
664 mtx_lock(&lock);
665 }
666 // XXX optimize for speed with bitmap?
667 // start searching from num_units position
668 BmPageHeader** list = &superblock[num_units];
669 unsigned lfb = num_units;
670 for (; lfb <= max_data_units; lfb++) {
671 bm_page = *list++;
672 if (bm_page) {
673 TRACE("taking page %p out of superblock[%tu]\n", (void*) bm_page, bm_page->list - superblock);
674 delete_from_list(bm_page);
675 goto found;
676 }
677 }
678 mtx_unlock(&lock);
679 return nullptr;
680
681found:
682 mtx_unlock(&lock);
683 *offset = find_free_block(bm_page, num_units);
684 if (*offset == 0) {
685 ERR("bm_page %p with LFB=%u must contain enough free space for %u units\n",
686 (void*) bm_page, bm_page->list - superblock, num_units);
687 abort();
688 }
689 return bm_page;
690}
691
692static void* bm_allocate(unsigned num_units, bool clean)
693/*
694 * Bitmap sub-allocator, should be called with num_units < max_data_units
695 */
696{
697 TRACE("num_units %u\n", num_units);
698
699 void* result = nullptr;
700 unsigned offset;
701 BmPageHeader* bm_page = find_available_page(num_units, &offset);
702 if (bm_page) {
703 // allocate
704 set_bits(bm_page, offset, num_units);
705 unhand_page(bm_page);
706 result = ((uint8_t*) bm_page) + offset * UNIT_SIZE;
707 goto out;
708 }
709
710 TRACE("allocating new page\n");
711
712 bm_page = call_mmap(sys_page_size(), false);
713 if (!bm_page) {
714 goto out;
715 }
716 // clean bitmap
717 Word* ptr = bm_page->bitmap;
718 for (unsigned i = 0, n = units_per_page / WORD_WIDTH; i < n; i++) {
719 *ptr++ = 0;
720 }
721 // mark reserved units and allocate units
722 set_bits(bm_page, 0, bm_page_header_size_in_units + num_units);
723
724 // give page away to LRU or superblock
725 unhand_page(bm_page);
726
727 atomic_fetch_add(&num_bm_pages, 1);
728 result = ((uint8_t*) bm_page) + bm_page_header_size_in_units * UNIT_SIZE;
729
730out:
731 if (result) {
732 atomic_fetch_add(&stats.blocks_allocated, 1);
733 }
734
735 if (result && clean) {
736 cleanse(result, 0, num_units * UNIT_SIZE);
737 }
738 TRACE("result=%p\n", result);
739 return result;
740}
741
742static void bm_shrink(BmPageHeader* bm_page, unsigned offset, unsigned old_num_units, unsigned new_num_units)
743{
744 TRACE("bm_page=%p, offset=%u, old_num_units=%u, new_num_units=%u\n",
745 (void*) bm_page, offset, old_num_units, new_num_units);
746
747 grab_page(bm_page);
748
749 unsigned tail_units = old_num_units - new_num_units;
750
751# ifdef DEBUG
752 check_units_allocated(__func__, bm_page, offset + new_num_units, tail_units);
753# endif
754 clear_bits(bm_page, offset + new_num_units, tail_units);
755
756 unhand_page(bm_page);
757}
758
759static bool bm_grow(BmPageHeader* bm_page, unsigned offset, unsigned old_num_units, unsigned new_num_units)
760{
761 TRACE("bm_page=%p, offset=%u, old_num_units=%u, new_num_units=%u\n",
762 (void*) bm_page, offset, old_num_units, new_num_units);
763
764 grab_page(bm_page);
765
766 unsigned increment = new_num_units - old_num_units;
767 unsigned length = count_zero_bits(bm_page, offset + old_num_units, increment);
768 if (length < increment) {
769 TRACE("available length %u is less than increment %u; need to move\n", length, increment);
770 unhand_page(bm_page);
771 return false;
772 }
773 set_bits(bm_page, offset + old_num_units, increment);
774
775 unhand_page(bm_page);
776 return true;
777}
778
779static void bm_release(BmPageHeader* bm_page, unsigned offset, unsigned num_units)
780{
781 TRACE("bm_page=%p, offset=%u, num_units=%u\n", (void*) bm_page, offset, num_units);
782
783 grab_page(bm_page);
784
785# ifdef DEBUG
786 check_units_allocated(__func__, bm_page, offset, num_units);
787# endif
788 clear_bits(bm_page, offset, num_units);
789
790 unhand_page(bm_page);
791 atomic_fetch_sub(&stats.blocks_allocated, 1);
792}
793
794/****************************************************************
795 * Allocator interface functions
796 */
797
798static void _init()
799{
800 // init page parameters
801
802 units_per_page = sys_page_size() / UNIT_SIZE;
803
804 bm_page_header_size_in_units = (
805 offsetof(BmPageHeader, bitmap)
806 + units_per_page / 8 // size of bitmap in bytes
807 + UNIT_SIZE - 1 // rounding
808 ) / UNIT_SIZE;
809
810 max_data_units = units_per_page - bm_page_header_size_in_units;
811
812 // allocate superblock
813
814 superblock = call_mmap(sys_page_size(), true);
815 if (!superblock) {
816 abort();
817 }
818
819 // init mutex
820 if (mtx_init(&lock, mtx_plain) != thrd_success) {
821 ERR("cannot init mutex\n");
822 }
823
824 SAY("page size %u; units per page: %u; header: %u units; data units: %u (%u bytes)\n",
825 sys_page_size(), units_per_page, bm_page_header_size_in_units, max_data_units, max_data_units * UNIT_SIZE);
826}
827
828
829static void* _allocate(unsigned nbytes, bool clean)
830{
831 TRACE("nbytes=%u\n", nbytes);
832
833 if (nbytes == 0) {
834 return nullptr;
835 }
836 unsigned num_units = bytes_to_units(nbytes);
837 if (num_units < max_data_units) {
838 // use bitmap sub-allocator for smaller blocks
839 return bm_allocate(num_units, clean);
840 } else {
841 // allocate pages directly
842 void* result = call_mmap(align_unsigned_to_page(nbytes), clean);
843 if (result) {
844 atomic_fetch_add(&stats.blocks_allocated, 1);
845 }
846 return result;
847 }
848}
849
850static void _release(void** addr_ptr, unsigned nbytes)
851{
852 void* addr = *addr_ptr;
853 if (!addr) {
854 return;
855 }
856
857 TRACE("addr=%p nbytes=%u\n", addr, nbytes);
858
859 if (nbytes == 0) {
860 ERR("called for %p with zero nbytes\n", addr);
861 abort();
862 }
863
864 BmPageHeader* bm_page = bm_page_by_addr(addr);
865 if (addr == (void*) bm_page) {
866 /*
867 * addr is aligned on page boundary, this means
868 * the block was allocated directly with mmap
869 */
870 call_munmap(addr, align_unsigned_to_page(nbytes));
871 atomic_fetch_sub(&stats.blocks_allocated, 1);
872
873 } else {
874 // use bitmap sub-allocator for smaller blocks
875 bm_release(bm_page, ptrdiff_to_units(addr, bm_page), bytes_to_units(nbytes));
876 }
877 *addr_ptr = nullptr;
878}
879
880static bool _reallocate(void** addr_ptr, unsigned old_nbytes, unsigned new_nbytes, bool clean, bool* addr_changed)
881{
882 if (old_nbytes == new_nbytes) {
883 goto success_same_addr;
884 }
885
886 void* addr = *addr_ptr;
887
888 TRACE("addr=%p old_nbytes=%u new_nbytes=%u\n", addr, old_nbytes, new_nbytes);
889
890 // shall we allocate new addr?
891 if (addr == nullptr) {
892 if (old_nbytes != 0) {
893 goto error;
894 }
895 addr = _allocate(new_nbytes, clean);
896 if (!addr) {
897 goto error;
898 }
899 *addr_ptr = addr;
900 goto success_changed_addr;
901 }
902
903 if (!(old_nbytes | new_nbytes)) {
904 // XXX might be a serious error, but it's a caller's problem
905 if (!old_nbytes) { ERR("called for %p with zero old_nbytes\n", addr); }
906 if (!new_nbytes) { ERR("called for %p with zero new_nbytes\n", addr); }
907 goto error;
908 }
909
910 unsigned new_num_units = bytes_to_units(new_nbytes);
911 unsigned old_num_units = bytes_to_units(old_nbytes);
912
913 if (new_num_units == old_num_units) {
914 if (clean && new_nbytes > old_nbytes) {
915 cleanse(addr, old_nbytes, new_nbytes);
916 }
917 goto success_same_addr;
918 }
919
920 BmPageHeader* bm_page = bm_page_by_addr(addr);
921
922 // shall we shrink?
923 if (new_num_units < old_num_units) {
924
925 if (new_num_units < max_data_units) {
926
927 // new block will use bitmap sub-allocator
928
929 if (old_num_units < max_data_units) {
930 // shrink using bitmap sub-allocator
931 if (addr == (void*) bm_page) {
932 ERR("address %p is not within data area\n", addr);
933 abort();
934 }
935 bm_shrink(bm_page, ptrdiff_to_units(addr, bm_page), old_num_units, new_num_units);
936 goto success_same_addr;
937 }
938
939 // shrinking block from page allocator to bitmap sub-allocator
940
941 if (addr != (void*) bm_page) {
942 ERR("address %p is not aligned on page boundary\n", addr);
943 abort();
944 }
945 void* new_block = bm_allocate(new_num_units, false);
946 if (!new_block) {
947 TRACE("falling back to remap\n");
948 goto remap;
949 }
950 memcpy(new_block, addr, new_nbytes);
951 bm_release(bm_page, ptrdiff_to_units(addr, bm_page), new_num_units);
952 *addr_ptr = new_block;
953 goto success_changed_addr;
954
955 } else {
956 // shrink using mremap
957 if (addr != (void*) bm_page) {
958 ERR("address %p is not aligned on page boundary\n", addr);
959 abort();
960 }
961 remap:
962 call_mremap(addr, old_nbytes, new_nbytes, false);
963 goto success_same_addr;
964 }
965 }
966
967 // grow
968
969 if (old_num_units < max_data_units) {
970
971 if (new_num_units < max_data_units) {
972
973 // grow using bitmap sub-allocator
974
975 if (addr == (void*) bm_page) {
976 ERR("address %p is not within data area\n", addr);
977 abort();
978 }
979 // try to grow within the same page
980 if(bm_grow(bm_page, ptrdiff_to_units(addr, bm_page), old_num_units, new_num_units)) {
981 if (clean) {
982 cleanse(addr, old_nbytes, new_nbytes);
983 }
984 goto success_same_addr;
985 }
986 }
987
988 // reallocate block
989
990 void* new_block = _allocate(new_nbytes, false);
991 if (!new_block) {
992 goto error;
993 }
994 memcpy(new_block, addr, old_nbytes);
995 _release(&addr, old_nbytes);
996 if (clean) {
997 cleanse(new_block, old_nbytes, new_nbytes);
998 }
999 *addr_ptr = new_block;
1000 if (addr_changed) { *addr_changed = new_block != addr; }
1001 return true;
1002
1003 } else {
1004 // grow using mremap
1005 if (addr != (void*) bm_page) {
1006 ERR("address %p is not aligned on page boundary\n", addr);
1007 abort();
1008 }
1009 void* new_addr = call_mremap(addr, old_nbytes, new_nbytes, clean);
1010 if (!new_addr) {
1011 goto error;
1012 }
1013 *addr_ptr = new_addr;
1014 if (addr_changed) { *addr_changed = new_addr != addr; }
1015 return true;
1016 }
1017
1018success_changed_addr:
1019 if (addr_changed) { *addr_changed = true; }
1020 return true;
1021
1022success_same_addr:
1023 if (addr_changed) { *addr_changed = false; }
1024 return true;
1025
1026error:
1027 if (addr_changed) { *addr_changed = false; }
1028 return false;
1029}
1030
1031Allocator pet_allocator = {
1032 .init = _init,
1033 .allocate = _allocate,
1034 .reallocate = _reallocate,
1035 .release = _release,
1036 .dump = dump,
1037 .trace = false,
1038 .verbose = false,
1039 .stats = &stats
1040};