sofixer源码分析(一)-加载流程

Sofixer源码分析

项目入口在main.cppmain函数中

1
2
3
4
5
6
7
8
int main(int argc, char* argv[]) {
if (main_loop(argc, argv)) {
FLOGI("Done!!!");
return 0;
}
useage();
return -1;
}

而在main_loop中会设置基地址

1
elf_reader.setDumpSoBaseAddr(base);

然后打开和加载so文件

1
2
3
4
5
6
7
8
9
10
11
12
if (!elf_reader.setSource(source.c_str())) {
FLOGE("unable to open source file");
return false;
}
if (!baseso.empty()) {
elf_reader.setBaseSoName(baseso.c_str());
}

if(!elf_reader.Load()) {
FLOGE("source so file is invalid");
return false;
}

我们重点关注下Load()的实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
bool ObElfReader::Load() {
// try open
if (!ReadElfHeader() || !VerifyElfHeader() || !ReadProgramHeader())
return false;
FixDumpSoPhdr();

bool has_base_dynamic_info = false;
uint32_t base_dynamic_size = 0;
if (!haveDynamicSectionInLoadableSegment()) {
// try to get dynamic information from base so file.
// TODO fix bug in dynamic section rebuild.
LoadDynamicSectionFromBaseSource();
has_base_dynamic_info = dynamic_sections_ != nullptr;
if (has_base_dynamic_info) {
base_dynamic_size = dynamic_count_ * sizeof(Elf_Dyn);
}
} else {
FLOGI("dynamic segment have been found in loadable segment, "
"argument baseso will be ignored.");
}

if (!ReserveAddressSpace(base_dynamic_size) ||
!LoadSegments() ||
!FindPhdr()) {
return false;
}
if (has_base_dynamic_info) {
// Copy dynamic information to the end of the file.
ApplyDynamicSection();
}

ApplyPhdrTable();

return true;
}

其中ReadElfHeader()实现如下,source_是一个封装了文件处理功能的对象。header_Elfreader的成员变量,通过这行就可以把头读入header_

1
2
3
4
5
6
7
8
bool ElfReader::ReadElfHeader() {
auto rc = source_->Read(&header_, sizeof(header_));
if (rc != sizeof(header_)) {
FLOGE("\"%s\" is too small to be an ELF executable", name_);
return false;
}
return true;
}

然后verify比较简单,略过,继续看ReadProgramHeader(),这个函数新建了mmap_result一个程序头大小的数组,然后赋值给成员变量phdr_mmap_,存放映射空间起始地址。然后继续强制类型转换为Elf_Phdr*并赋值给成员变量phdr_table_

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
bool ElfReader::ReadProgramHeader() {
phdr_num_ = header_.e_phnum;

// Like the kernel, we only accept program header tables that
// are smaller than 64KiB.
if (phdr_num_ < 1 || phdr_num_ > 65536/sizeof(Elf_Phdr)) {
FLOGE("\"%s\" has invalid e_phnum: %zu", name_, phdr_num_);
return false;
}

phdr_size_ = phdr_num_ * sizeof(Elf_Phdr);
void* mmap_result = new uint8_t[phdr_size_];
if(!source_->Read(mmap_result, phdr_size_, header_.e_phoff)) {
FLOGE("\"%s\" has no valid phdr data", name_);
return false;
}

phdr_mmap_ = mmap_result;
phdr_table_ = reinterpret_cast<Elf_Phdr*>(reinterpret_cast<char*>(mmap_result));

return true;
}

FixDumpSoPhdr()

首先将PT_LOAD类型的段收集起来,然后根据两个LOAD段之间的首地址差作为段大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
void ObElfReader::FixDumpSoPhdr() {
// some shell will release data between loadable phdr(s), just load all memory data
if (dump_so_base_ != 0) {
std::vector<Elf_Phdr*> loaded_phdrs;
for (auto i = 0; i < phdr_num_; i++) {
auto phdr = &phdr_table_[i];
if(phdr->p_type != PT_LOAD) continue;
loaded_phdrs.push_back(phdr);
}
std::sort(loaded_phdrs.begin(), loaded_phdrs.end(),
[](Elf_Phdr * first, Elf_Phdr * second) {
return first->p_vaddr < second->p_vaddr;
});
if (!loaded_phdrs.empty()) {
for (unsigned long i = 0, total = loaded_phdrs.size(); i < total; i++) {
auto phdr = loaded_phdrs[i];
if (i != total - 1) {
// to next loaded segament
auto nphdr = loaded_phdrs[i+1];
phdr->p_memsz = nphdr->p_vaddr - phdr->p_vaddr;
} else {
// to the file end
phdr->p_memsz = file_size - phdr->p_vaddr;
}
phdr->p_filesz = phdr->p_memsz;
}
}
}

auto phdr = phdr_table_;
for(auto i = 0; i < phdr_num_; i++) {
phdr->p_paddr = phdr->p_vaddr;
phdr->p_filesz = phdr->p_memsz; // expend filesize to memsiz
phdr->p_offset = phdr->p_vaddr; // since elf has been loaded. just expand file data to dump memory data
// phdr->p_flags = 0 // TODO fix flags by PT_TYPE
phdr++;
}
}

遍历phdr_table_,如果段类型不是PT_DYNAMIC就跳过

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
bool ObElfReader::haveDynamicSectionInLoadableSegment() {
Elf_Addr min_vaddr, max_vaddr;
phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr, &max_vaddr);

const Elf_Phdr* phdr = phdr_table_;
const Elf_Phdr* phdr_limit = phdr + phdr_num_;

for (phdr = phdr_table_; phdr < phdr_limit; phdr++) {
if (phdr->p_type != PT_DYNAMIC) {
continue;
}
if (phdr->p_vaddr > min_vaddr && (phdr->p_vaddr + phdr->p_memsz) < max_vaddr) {
return true;
}
break;
}
return false;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
bool ElfReader::ReserveAddressSpace(uint32_t padding_size) {
Elf_Addr min_vaddr;
load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr);
if (load_size_ == 0) {
FLOGE("\"%s\" has no loadable segments", name_);
return false;
}
pad_size_ = padding_size;

uint32_t alloc_size = load_size_ + pad_size_;

uint8_t* addr = reinterpret_cast<uint8_t*>(min_vaddr);
// alloc map data, and load in addr
uint8_t * start = new uint8_t[alloc_size];
memset(start, 0, alloc_size);

load_start_ = start;
// the first loaded phdr data should be loaded in the start of load_start
// (load_bias_ + phdr.vaddr), so load_bias_ = load_start - phdr.vaddr(min_addr)
load_bias_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t >(start)
- reinterpret_cast<uintptr_t >(addr));
return true;
}

遍历程序头表,逐个加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
bool ElfReader::LoadSegments() {
// TODO fix file dada load error, file data between LOAD seg should be loaded
for (size_t i = 0; i < phdr_num_; ++i) {
const Elf_Phdr* phdr = &phdr_table_[i];

if (phdr->p_type != PT_LOAD) {
continue;
}

// Segment addresses in memory.
Elf_Addr seg_start = phdr->p_vaddr;
Elf_Addr seg_end = seg_start + phdr->p_memsz;

// Elf_Addr seg_page_start = PAGE_START(seg_start);
// Elf_Addr seg_page_end = PAGE_END(seg_end);

Elf_Addr seg_file_end = seg_start + phdr->p_filesz;

// File offsets.
Elf_Addr file_start = phdr->p_offset;
Elf_Addr file_end = file_start + phdr->p_filesz;

// Elf_Addr file_page_start = PAGE_START(file_start);
Elf_Addr file_length = file_end - file_start;


if (file_length != 0) {
// memory data loading
void* load_point = seg_start + reinterpret_cast<uint8_t *>(load_bias_);
if(!source_->Read(load_point, file_length, file_start)) {
FLOGE("couldn't map \"%s\" segment %zu: %s", name_, i, strerror(errno));
return false;
}

}

// if the segment is writable, and does not end on a page boundary,
// zero-fill it until the page limit.
// if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) {
// memset(seg_file_end + reinterpret_cast<uint8_t *>(load_bias_), 0, PAGE_SIZE - PAGE_OFFSET(seg_file_end));
// }

// seg_file_end = PAGE_END(seg_file_end);

// seg_file_end is now the first page address after the file
// content. If seg_end is larger, we need to zero anything
// between them. This is done by using a private anonymous
// map for all extra pages.
// since data has been clear, just skip this step
// if (seg_page_end > seg_file_end) {
// void* load_point = (uint8_t*)load_bias_ + seg_file_end;
// memset(load_point, 0, seg_page_end - seg_file_end);
// }
}
return true;
}

sofixer源码分析(一)-加载流程
http://showfaker.top/2024/03/12/sofixer-load/
作者
ShowFaker
发布于
2024年3月12日
许可协议