CTF Angr 符号执行学习(一)
Jsjsj Lv2

CTF Angr 符号执行学习(一)

前言:最近对自动化二进制漏洞挖掘哼感兴趣,百度了下,发现自动化二进制漏洞挖掘的一个核心技术之一Angr符号执行,感觉很有意思再加上之前比赛也有很多关于Angr的题目,angr也可以对抗代码混淆ollvm这些,感觉以后用处蛮大的,于是就学了学,这里记录下笔记,发到博客里和微信公众号里供大家学习,学了一天,才刚学到07文件符号执行化,就分别总结成了一、二、三篇,太菜了

一、00

我们要用到两个模块,一个angr和一个sys

angr进行符号执行,sys来进行用户输入输出

我们第一步要进行angr脚本去跑约束逆向的时候,

第一步:

1.创建文件项目:

proj=angr.Project(binary,auto_load_libs=False),不加载libc

2.初始化地址块

state=proj.factory.entry_state() 这里的块初始化到了程序的start 块

3.初始化以后,我们要对这个块进行模拟

sim=proj.factory.simgr(state)

4.模拟后,开始寻找正确的路径

ok_address=0x8048675
sim.explore(find=ok_address) 这里是我们输出正确的路径

5.搜索到路径后,把状态放到sim.found[0]里,然后从这个状态里取出来即可

if sim.found:
    solution_state=sim.found[0]
    print(solution_state.posix.dumps(sys.stdin.fileno()).decode())
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import angr
import sys
def solver():
binary="./00_angr_find"
proj=angr.Project(binary,auto_load_libs=False)

state=proj.factory.entry_state()
sim=proj.factory.simgr(state)

ok_address=0x8048675
sim.explore(find=ok_address)
if sim.found:
solution_state=sim.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()).decode())

else:
print("no!!!!")


if __name__=='__main__':
solver()

二、01

这个案例,前面部分跟00一样,在寻找路径的时候,explore的时候加个avoid参数,这个参数就避开错误代码块的参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import angr
import sys

def solver():
binary="./01_angr_avoid"
proj=angr.Project(binary)
state=proj.factory.entry_state()
sim=proj.factory.simgr(state)

ok_address=0x80485E0
bi_kai=0x80485A8
sim.explore(find=ok_address,avoid=bi_kai)
if sim.found:
solution_state=sim.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()).decode())

else:
print("no !!!!!")


if __name__=='__main__':
solver()

三、02

这里刚上面一样,唯一的区别就是,自己写了一个避开的函数和正确的函数,赋给了find和avoid,效果都是一样的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import angr
import sys

def find_path(state):
return b'Good Job.' in state.posix.dumps(sys.stdout.fileno())


def avoid_path(state):
return b'Try again.' in state.posix.dumps(sys.stdout.fileno())
def solver():
binary="./02_angr_find_condition"
proj=angr.Project(binary)
init_state=proj.factory.entry_state()
sim=proj.factory.simgr(init_state)

ok_address=0x08048715
sim.explore(find=find_path,avoid=avoid_path)
if sim.found:
solution_state=sim.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()).decode())

else:
raise Exception('Could not find the solution')



if __name__=='__main__':
solver()

四、03 regs 寄存器

这里就有特点了,对寄存器的操作

我们看题目代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
int __cdecl main(int argc, const char **argv, const char **envp)
{
__int64 user_input; // rax
int v5; // [esp+4h] [ebp-14h]
int v6; // [esp+8h] [ebp-10h]
int v7; // [esp+Ch] [ebp-Ch]
int v8; // [esp+Ch] [ebp-Ch]

printf("Enter the password: ");
user_input = get_user_input();
v7 = HIDWORD(user_input);
v5 = complex_function_1(user_input);
v6 = complex_function_2();
v8 = complex_function_3(v7);
if ( v5 || v6 || v8 )
puts("Try again.");
else
puts("Good Job.");
return 0;
}

get_user_input:

1
2
3
4
5
6
7
8
9
10
int get_user_input()
{
int v1; // [esp+0h] [ebp-18h] BYREF
int v2; // [esp+4h] [ebp-14h] BYREF
int v3[4]; // [esp+8h] [ebp-10h] BYREF

v3[1] = __readgsdword(0x14u);
__isoc99_scanf("%x %x %x", &v1, &v2, v3);
return v1;
}

通过上面两行代码我们发现,在getuserinput这个函数里,对v1、v2、v3进行了输入,然后只用了v1,然后看汇编

1
2
3
4
5
6
7
8
9
10
11
0804892A 68 93 8A 04 08                push    offset aXXX                     ; "%x %x %x"
.text:0804892F E8 9C FA FF FF call ___isoc99_scanf
.text:0804892F
.text:08048934 83 C4 10 add esp, 10h
.text:08048937 8B 4D E8 mov ecx, [ebp+var_18]
.text:0804893A 89 C8 mov eax, ecx
.text:0804893C 8B 4D EC mov ecx, [ebp+var_14]
.text:0804893F 89 CB mov ebx, ecx
.text:08048941 8B 4D F0 mov ecx, [ebp+var_10]
.text:08048944 89 CA mov edx, ecx
.text:08048946 90 nop

发现三个值传给 了eax ebx ecx,我们可以在输入函数之后,利用向量给 initial_state.regs.eax ebx edx,进行向量的赋值,在后面进行状态查看即可,分析完后,我这里对关键的exp 代码进行分析

(1).start_address=0x08048980
initial_state=proj.factory.blank_state(addr=start_address) 这里在输入函数之后开始空白块,我这里理解的空白块是从这里开始进行向下执行,并且绕过程序自己的输入函数,我们自己进行对reg来操作赋值

(2).

1
2
3
4
5
6
7
passwd_0=claripy.BVS('passwd_0',32)
passwd_1=claripy.BVS('passwd_1',32)
passwd_2=claripy.BVS('passwd_2',32)

initial_state.regs.eax=passwd_0
initial_state.regs.ebx=passwd_1
initial_state.regs.edx=passwd_2

用claripy进行向量的创建,创建3个向量,因为是int32位,所以这里后面写成了32,然后再把三个向量导入到eax ebx edx

(3).

1
2
simgr=proj.factory.simgr(initial_state)
simgr.explore(find=find_path,avoid=avoid_path)

然后进行模拟执行并且查询

(4) .

然后通过simgr.found获取解决后的state(状态),

1
2
3
solution0=solution_state.solver.eval(passwd_0)
solution1=solution_state.solver.eval(passwd_1)
solution2=solution_state.solver.eval(passwd_2)

然后再用solution_state.solver.eval获取状态值,获取到状态值打印即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import angr
import sys
import claripy

def find_path(state):
return b'Good Job.' in state.posix.dumps(sys.stdout.fileno())

def avoid_path(state):
return b'Try again.' in state.posix.dumps(sys.stdout.fileno())



def solver():
binary="./03_angr_symbolic_registers"
proj=angr.Project(binary)
start_address=0x08048980
initial_state=proj.factory.blank_state(addr=start_address)

passwd_0=claripy.BVS('passwd_0',32)
passwd_1=claripy.BVS('passwd_1',32)
passwd_2=claripy.BVS('passwd_2',32)

initial_state.regs.eax=passwd_0
initial_state.regs.ebx=passwd_1
initial_state.regs.edx=passwd_2

simgr=proj.factory.simgr(initial_state)
simgr.explore(find=find_path,avoid=avoid_path)
if simgr.found:
solution_state=simgr.found[0]
solution0=solution_state.solver.eval(passwd_0)
solution1=solution_state.solver.eval(passwd_1)
solution2=solution_state.solver.eval(passwd_2)

print('passwd0:{}'.format(hex(solution0)))
print('passwd1:{}'.format(hex(solution1)))
print('passwd2:{}'.format(hex(solution2)))

else:
raise Exception("Could not find the solution!")



if __name__=='__main__':
solver()

五、04 stack

这里是对stack的一个符号化,对栈的化,就得注意栈的分布了,esp和ebp寄存器这些

我们还是选关键代码进行分析

(1)。

1
2
3
4
5
6
passwd0=claripy.BVS("passwd0",32)  #这里创建了向量
passwd1=claripy.BVS("passwd1",32) #
initial_state.regs.ebp=initial_state.regs.esp
initial_state.regs.esp-=0x8 #scanf
initial_state.stack_push(passwd0)
initial_state.stack_push(passwd1)

第一步先把寄存器esp给了ebp,然后让esp-0x8,这里减0x8的原因,我推测有两个原因,一是联系上下文、二是开辟栈空间

然后把我们创建的向量放到stack上即可,下面代码还是老一套,solver.eval(向量状态)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import angr
import sys
import claripy



def solver():
binary="./04_angr_symbolic_stack"
proj=angr.Project(binary)

start_address=0x8048697
initial_state=proj.factory.blank_state(addr=start_address)

passwd0=claripy.BVS("passwd0",32)
passwd1=claripy.BVS("passwd1",32)
initial_state.regs.ebp=initial_state.regs.esp
initial_state.regs.esp-=0x8 #scanf
initial_state.stack_push(passwd0)
initial_state.stack_push(passwd1)

#initial_state.regs.esp-=12 #.text:08048682 8D 45 F0 lea eax, [ebp+var_10]the relative position of esp when return from scanf()
simgr=proj.factory.simgr(initial_state)
simgr.explore(find=0x80486E1,avoid=0x80486CF)

if simgr.found:
solution_state=simgr.found[0]
solution0=solution_state.solver.eval(passwd0)
solution1=solution_state.solver.eval(passwd1)

print("passwd0:{}".format(hex(solution0)))
print("passwd1:{}".format(hex(solution1)))

else:
raise Exception("Colindd not !!!!")



if __name__=='__main__':
solver()

六、05 me’mory

这道题是对memory内存进行符号化

我们看下源代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int __cdecl main(int argc, const char **argv, const char **envp)
{
int i; // [esp+Ch] [ebp-Ch]

memset(user_input, 0, 0x21u);
printf("Enter the password: ");
__isoc99_scanf("%8s %8s %8s %8s", user_input, &unk_A1BA1C8, &unk_A1BA1D0, &unk_A1BA1D8);
for ( i = 0; i <= 31; ++i )
*(_BYTE *)(i + 0xA1BA1C0) = complex_function(*(char *)(i + 0xA1BA1C0), i);
if ( !strncmp(user_input, "NJPURZPCDYEAXCSJZJMPSOMBFDDLHBVN", 0x20u) )
puts("Good Job.");
else
puts("Try again.");
return 0;
}

通过以上代码,我们发现是往内存bss段里读入4个8bytes,所以这里我们利用一个为内存赋值的语句

init_state.memory.store(address,passwd0) 这个是把passwd0赋值给address这个地址的里

然后我们创建4个变量并且把这个四个变量放到读入的内存里

1
2
3
4
5
address=0xA1BA1C0
init_state.memory.store(address,passwd0) #放到
init_state.memory.store(address+0x8,passwd1)
init_state.memory.store(address+0x10,passwd2)
init_state.memory.store(address+0x18,passwd3)

这里注意字节的增长

然后下面就是按照之前的方法获取相应的状态值即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import angr
import sys
import claripy


def solver():
binary="./05_angr_symbolic_memory"
proj=angr.Project(binary)
start_address=0x8048601
init_state=proj.factory.blank_state(addr=start_address)

passwd0=claripy.BVS("passwd0",64)
passwd1=claripy.BVS("passwd1",64)
passwd2=claripy.BVS("passwd2",64)
passwd3=claripy.BVS("passwd3",64)
address=0xA1BA1C0
init_state.memory.store(address,passwd0)
init_state.memory.store(address+0x8,passwd1)
init_state.memory.store(address+0x10,passwd2)
init_state.memory.store(address+0x18,passwd3)

simgr=proj.factory.simgr(init_state)
simgr.explore(find=0x804866A,avoid=0x8048658)

if simgr.found:
solution_state=simgr.found[0]
solution0=solution_state.solver.eval(passwd0,cast_to=bytes)
solution1=solution_state.solver.eval(passwd1,cast_to=bytes)
solution2=solution_state.solver.eval(passwd2,cast_to=bytes)
solution3=solution_state.solver.eval(passwd3,cast_to=bytes)

print("passwd0:{}".format(solution0))
print("passwd1:{}".format(solution1))
print("passwd2:{}".format(solution2))
print("passwd3:{}".format(solution3))

raise Exception("couln not !!!!!!!")

if __name__=='__main__':
solver()

七、06 动态memory

这里是动态的chunk,malloc出来的,这里我们在动态里进行伪造fakechunk,然后再往这个伪造的chunk进行放入向量即可

先看下源代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
int __cdecl main(int argc, const char **argv, const char **envp)
{
char *v3; // ebx
char *v4; // ebx
int v6; // [esp-10h] [ebp-1Ch]
int i; // [esp+0h] [ebp-Ch]

buffer0 = (char *)malloc(9u);
buffer1 = (char *)malloc(9u);
memset(buffer0, 0, 9u);
memset(buffer1, 0, 9u);
printf("Enter the password: ");
__isoc99_scanf("%8s %8s", buffer0, buffer1, v6);
for ( i = 0; i <= 7; ++i )
{
v3 = &buffer0[i];
*v3 = complex_function(buffer0[i], i);
v4 = &buffer1[i];
*v4 = complex_function(buffer1[i], i + 32);
}
if ( !strncmp(buffer0, "UODXLZBI", 8u) && !strncmp(buffer1, "UAORRAYF", 8u) )
puts("Good Job.");
else
puts("Try again.");
free(buffer0);
free(buffer1);
return 0;
}

获取buffer0和buffer1的地址对这个地址进行fake即可

以下是我伪造的代码:

1
2
3
4
5
6
7
8
9
10
11
12
buf0=0xABCC8A4
buf1=0xABCC8AC
fake_buf0=0xAAAAA10
fake_buf1=0xAAAAA20
p1=claripy.BVS("p1",64)
p2=claripy.BVS("p2",64)

init_state.memory.store(buf0,fake_buf0,endness=proj.arch.memory_endness)
init_state.memory.store(buf1,fake_buf1,endness=proj.arch.memory_endness)

init_state.memory.store(fake_buf0,p1)
init_state.memory.store(fake_buf1,p2)

然后下一步一把梭获取状态值即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import angr
import sys
import claripy

def find_path(state):
return b'Good Job.' in state.posix.dumps(sys.stdout.fileno())

def avoid_path(state):
return b'Try again.' in state.posix.dumps(sys.stdout.fileno())



def solver():
binary="./06_angr_symbolic_dynamic_memory"

proj=angr.Project(binary)

start_address=0x8048699

init_state=proj.factory.blank_state(addr=start_address)

buf0=0xABCC8A4
buf1=0xABCC8AC
fake_buf0=0xAAAAA10
fake_buf1=0xAAAAA20
p1=claripy.BVS("p1",64)
p2=claripy.BVS("p2",64)

init_state.memory.store(buf0,fake_buf0,endness=proj.arch.memory_endness)
init_state.memory.store(buf1,fake_buf1,endness=proj.arch.memory_endness)

init_state.memory.store(fake_buf0,p1)
init_state.memory.store(fake_buf1,p2)

simgr=proj.factory.simgr(init_state)
simgr.explore(find=find_path,avoid=avoid_path)
if simgr.found:
sol_state=simgr.found[0]
p1=sol_state.solver.eval(p1,cast_to=bytes)
p2=sol_state.solver.eval(p2,cast_to=bytes)

print("p1:{}".format(p1))
print("p2:{}".format(p2))


else:
raise Exception("Colud not!!!!!!")


if __name__=='__main__':
solver()

八、07 符号fiile文件

这个题是对file文件进行符号化

我们看下题目代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
int __cdecl __noreturn main(int argc, const char **argv, const char **envp)
{
int i; // [esp+Ch] [ebp-Ch]

memset(buffer, 0, sizeof(buffer));
printf("Enter the password: ");
__isoc99_scanf("%64s", buffer);
ignore_me((int)buffer, 0x40u);
memset(buffer, 0, sizeof(buffer));
fp = fopen("OJKSQYDP.txt", "rb");
fread(buffer, 1u, 0x40u, fp);
fclose(fp);
unlink("OJKSQYDP.txt");
for ( i = 0; i <= 7; ++i )
*(_BYTE *)(i + 0x804A0A0) = complex_function(*(char *)(i + 0x804A0A0), i);
if ( strncmp(buffer, "AQWLCTXB", 9u) )
{
puts("Try again.");
exit(1);
}
puts("Good Job.");
exit(0);
}

我们看到,这道打开了一个文件。在这道题的基础上,我们可以使用angr模拟文件从而进行文件内容符号化,我们用到了几个关键语句

1
2
3
4
5
file_size=0x40
filename='OJKSQYDP.txt'
password=claripy.BVS('password',file_size*8)
file_name=angr.storage.SimFile(filename,content=password,size=file_size)
init_state.fs.insert(filename,file_name)

我们利用angr.storage.SimFile模拟文件,第一个参数为文件名,第二个为我们的向量,第三个为题目本身的读入的size,然后模拟完后,我们再对文件系统进行载入init_state.fs.insert(filename,file_name)即可实现文件符号化

然后剩下就是用angr进行模拟,获取状态,获取向量状态值,一把梭

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import angr
import sys
import claripy



def isGood(state):
return b'Good Job.' in state.posix.dumps(1)

def isBad(state):
return b'Try again.' in state.posix.dumps(1)



def solver():
binary="./07_angr_symbolic_file"
proj=angr.Project(binary)

start_address=0x80488EA
init_state=proj.factory.blank_state(addr=start_address)

file_size=0x40
filename='OJKSQYDP.txt'
password=claripy.BVS('password',file_size*8)
file_name=angr.storage.SimFile(filename,content=password,size=file_size)
init_state.fs.insert(filename,file_name)

simgr=proj.factory.simgr(init_state)
simgr.explore(find=isGood,avoid=isBad)
if simgr.found:
sol_state=simgr.found[0]

passwd=sol_state.solver.eval(password,cast_to=bytes)

print("passwd:{}".format(passwd))


else:
raise Exception("Colund not!!!!!!")



if __name__=="__main__":
solver()

未完持续

 Comments