第八周复杂数据类型的机器级表示

第1讲数组和指针类型的分配和访问
第2讲结构和联合数据类型的分配和访问
- 1.结构类型的分配和访问（20分钟）
- 2.联合类型的分配和访问（18分钟）
第3讲数据的对齐存放
- 1.数据的对齐方式（14分钟）
- 2.数据对齐方式举例（14分钟）
第4讲越界访问和缓冲区溢出攻击
- 越界访问和缓冲区溢出攻击（27分钟）
第八周小测验

第1讲数组和指针类型的分配和访问

1.数组的分配与访问（19分钟）

注：这里如果使用mov指令，则会把ebx寄存器中内容10传送到edx寄存器。

2.数组与指针的关系（9分钟）

movl 传送内容，leal 传送地址

其中第5个不好理解，测试程序及结果如下：

#include <stdio.h>
int main()
{
    int A[10] = {1,2,3,4,5,6,7,8,9,10};
    printf("%d",&A[7]-A);
    return 0;
}

第5条也可以用leal指令实现。

3.指针数组和多维数组（19分钟）

#include <stdio.h>
int main()
{
    static short num[][4] = {{2,9,-1,5},{3,8,2,-6}};
    static short *pn[2] = {num[0],num[1]};
    static short s[2] = {0,0};
    int i,j;
    for(i=0;i<2;i++)
    {
        for(j=0;j<4;j++)
            s[i] += *pn[i]++;
        printf("sum of line %d is %d\n",i,s[i]);
    }
    return 0;
}

sum of line 0 is 15
sum of line 1 is 7

#include <stdio.h>

int main()
{
    static short num[][4] = {{2,9,-1,5},{3,8,2,-6}};
    static short *pn[2] = {num[0],num[1]};
    static short s[2] = {0,0};
    int i,j;
    for(i=0;i<2;i++)
    {
        for(j=0;j<4;j++)
        {
            s[i] += num[i][j];
            printf("0x%x\n",&num[i][j]);
        }
        printf("sum of line %d is %d\n",i,s[i]);
    }
    return 0;
}

0x403010
0x403012
0x403014
0x403016
sum of line 0 is 15
0x403018
0x40301a
0x40301c
0x40301e
sum of line 1 is 7

静态区地址是按代码顺序从低地址向高地址增长的，这和栈区不同！！

第2讲结构和联合数据类型的分配和访问

1.结构类型的分配和访问（20分钟）

#include <stdio.h>


int main()
{
    int m = 0;
    struct cont_info
    {
        char id[8];
        char name [12];
        unsigned post;
        char address[100];
        char phone[20];
    };
    struct cont_info x = {"0000000","ZhangS",210022,"273 long street,High Building#3015","12345678"};
    int n = 1;
    printf("0x%x\n",&m);
    printf("0x%x\n",&(x.id));
    printf("0x%x\n",&(x.name));
    printf("0x%x\n",&(x.post));
    printf("0x%x\n",&(x.address));
    printf("0x%x\n",&(x.phone));
    printf("0x%x\n",&n);

    printf("%d\n",&m);
    printf("%d\n",&(x.id));
    printf("%d\n",&(x.name));
    printf("%d\n",&(x.post));
    printf("%d\n",&(x.address));
    printf("%d\n",&(x.phone));
    printf("%d\n",&n);

    return 0;
}

0x62fe0c
0x62fd70
0x62fd78
0x62fd84
0x62fd88
0x62fdec
0x62fd6c
6487564
6487408
6487416
6487428
6487432
6487532
6487404

这个程序中结构体变量和m、n存储位置相同，都在栈区。

#include <stdio.h>

struct cont_info
{
    char id[8];
    char name [12];
    unsigned post;
    char address[100];
    char phone[20];
};

struct cont_info x = {"0000000","ZhangS",210022,"273 long street,High Building#3015","12345678"};

int main()
{
    printf("0x%x\n",&(x.id));
    printf("0x%x\n",&(x.name));
    printf("0x%x\n",&(x.post));
    printf("0x%x\n",&(x.address));
    printf("0x%x\n",&(x.phone));
    printf("%d\n",&(x.id));
    printf("%d\n",&(x.name));
    printf("%d\n",&(x.post));
    printf("%d\n",&(x.address));
    printf("%d\n",&(x.phone));
    return 0;
}

这个程序中结构体变量和m、n存储位置不同，前者在静态区，后者在栈区。

2.联合类型的分配和访问（18分钟）

#include <stdio.h>
int main()
{
    union uarea
    {
        char c_data;
        short s_data;
        int i_data;
        long l_data;
    }u;
    printf("The size of uarea is %d\n",sizeof(u));
    return 0;
}

The size of uarea is 4

#include <stdio.h>
unsigned float2unsign(float f)
{
    union
    {
        float f;
        unsigned u;
    }tmp_union;
    tmp_union.f = f;
    return tmp_union.u;
}
int main()
{
    printf("%d",float2unsign(10.0));
    return 0;
}

1092616192

第3讲数据的对齐存放

1.数据的对齐方式（14分钟）

#include <stdio.h>
int main()
{
    struct cont_info
    {
        short si;
        int i;
        char c;
        double f;
    };
    struct cont_info x = {100,9,'A',10.0};

    printf("0x%x\n",&(x.si));
    printf("0x%x\n",&(x.i));
    printf("0x%x\n",&(x.c));
    printf("0x%x\n",&(x.f));

    printf("%d\n",&(x.si));
    printf("%d\n",&(x.i));
    printf("%d\n",&(x.c));
    printf("%d\n",&(x.f));

    return 0;
}

6487569/8=810946

Windows系统中double类型确实是按8个字节对齐的即地址是8的倍数！

这种写法太浪费空间了！！

#include <stdio.h>
int main()
{
    struct cont_info
    {
        int i;
        short si;
        double f;
        char c;
    }sa[10];
    struct cont_info x = {100,9,10.0,'A'};
    printf("The size of x is %d\n",sizeof(x));
    printf("The size of sa is %d\n",sizeof(sa));

    printf("0x%x\n",&(x.i));
    printf("0x%x\n",&(x.si));
    printf("0x%x\n",&(x.f));
    printf("0x%x\n",&(x.c));

    printf("%d\n",&(x.i));
    printf("%d\n",&(x.si));
    printf("%d\n",&(x.f));
    printf("%d\n",&(x.c));

    return 0;
}

The size of x is 24
The size of sa is 240
0x62fd10
0x62fd14
0x62fd18
0x62fd20
6487312
6487316
6487320
6487328

#include <stdio.h>
int main()
{
    struct cont_info
    {
        int i;
        char c;
        short si;
        double f;
    };
    struct cont_info x = {100,'A',9,10.0};
    printf("0x%x\n",&(x.i));
    printf("0x%x\n",&(x.c));
    printf("0x%x\n",&(x.si));
    printf("0x%x\n",&(x.f));

    printf("%d\n",&(x.i));
    printf("%d\n",&(x.c));
    printf("%d\n",&(x.si));
    printf("%d\n",&(x.f));

    return 0;
}

#include <stdio.h>
int main()
{
    struct cont_info
    {
        int i;
        short si;
        char c;
        double f;
    };
    struct cont_info x = {100,9,'A',10.0};
    printf("0x%x\n",&(x.i));
    printf("0x%x\n",&(x.si));
    printf("0x%x\n",&(x.c));
    printf("0x%x\n",&(x.f));

    printf("%d\n",&(x.i));
    printf("%d\n",&(x.si));
    printf("%d\n",&(x.c));
    printf("%d\n",&(x.f));

    return 0;
}

2.数据对齐方式举例（14分钟）

#include <stdint-gcc.h>
#include <stdio.h>
#pragma pack(4)
typedef struct
{
    uint32_t f1;
    uint8_t f2;
    uint8_t f3;
    uint32_t f4;
    uint64_t f5;
}__attribute__((aligned(1024))) ts;

int main()
{
    printf("Struct size is: %d, aligned on 1024\n",sizeof(ts));
    printf("Allocate f1 on address:0x%x\n",&(((ts*)0)->f1));
    printf("Allocate f2 on address:0x%x\n",&(((ts*)0)->f2));
    printf("Allocate f3 on address:0x%x\n",&(((ts*)0)->f3));
    printf("Allocate f4 on address:0x%x\n",&(((ts*)0)->f4));
    printf("Allocate f5 on address:0x%x\n",&(((ts*)0)->f5));
    return 0;
}

Struct size is: 1024, aligned on 1024
Allocate f1 on address:0x0
Allocate f2 on address:0x4
Allocate f3 on address:0x5
Allocate f4 on address:0x8
Allocate f5 on address:0xc

#include <stdio.h>
//#pragma pack(1)

struct test
{
    char x2;
    int x1;
    short x3;
    long long x4;
}__attribute__((packed));

struct test1
{
    char x2;
    int x1;
    short x3;
    long long x4;
};

struct test2
{
    char x2;
    int x1;
    short x3;
    long long x4;
}__attribute__((aligned(8)));

int main()
{
    printf("size=%d\n",sizeof(struct test));
    printf("size=%d\n",sizeof(struct test1));
    printf("size=%d\n",sizeof(struct test2));
    return 0;
}

在Windows10（按8字节对齐）下执行结果：

size=24
size=24
size=24

紧凑方式没起作用。

在WSL2（Ubuntu22.04LTS）上执行结果如下：

size=15
size=24
size=24

中间这个按自然边界对齐的方式所占用字节数和课程中讲到的不同。。

在Ubuntu系统上这个程序执行结果也是上面这样。。

#include <stdio.h>
#pragma pack(1)

struct test
{
    char x2;
    int x1;
    short x3;
    long long x4;
}__attribute__((packed));

struct test1
{
    char x2;
    int x1;
    short x3;
    long long x4;
};

struct test2
{
    char x2;
    int x1;
    short x3;
    long long x4;
}__attribute__((aligned(8)));

int main()
{
    printf("size=%d\n",sizeof(struct test));
    printf("size=%d\n",sizeof(struct test1));
    printf("size=%d\n",sizeof(struct test2));
    return 0;
}

size=15
size=15
size=16

#include <stdio.h>
#pragma pack(1)

struct test
{
    char x2;
    int x1;
    short x3;
    long long x4;
}__attribute__((packed));

struct test1
{
    char x2;
    int x1;
    short x3;
    long long x4;
};

struct test2
{
    char x2;
    int x1;
    short x3;
    long long x4;
}__attribute__((aligned(8)));

int main()
{
    printf("size=%d\n",sizeof(struct test));
    printf("size=%d\n",sizeof(struct test1));
    printf("size=%d\n",sizeof(struct test2));
    return 0;
}

size=15
size=16
size=16

第4讲越界访问和缓冲区溢出攻击

越界访问和缓冲区溢出攻击（27分钟）

test.c文件内容：

#include <stdio.h>
#include <string.h>

void outputs(char *str)
{
    char buffer[16];
    strcpy(buffer,str);
    printf("%s\n",buffer);
}

void cracker(void)
{
    printf("Being cracked\n");
}
int main(int argc,char *argv[])
{
    outputs(argv[1]);
    return 0;
}

main.c文件内容：

#include <stdio.h>
#define _GNU_SOURCE
#include <unistd.h>
char code[]=
    "0123456789ABCDEFXXXX"
    "\x11\x84\x04\x08"
    "\x00";

int main()
{
    char *argv[3];
    argv[0]="./test";
    argv[1]=code;
    argv[2]=NULL;
    execve(argv[0],argv,NULL);
    return 0;
}

在WSL2Ubuntu上运行结果如下：

xxxx@DESKTOP-xxxx:~/csapp$ ./test1
Segmentation fault
xxxx@DESKTOP-xxxx:~/csapp$ ./hello
0123456789ABCDEFXXXX

这里应该有保护机制，阻止了缓冲区溢出的攻击，直接就输出了Segmentation fault，没能输出"Being cracked"。

当然也可能是没能提供cracker()函数的真正地址。

第八周小测验

1求`a[2]`地址

假定全局short型数组a的起始地址为0x804908c，则a[2]的地址是（ C ）。

A.0x8049094

B.0x8049092

C.0x8049090

D.0x804908e

解释：short型的字节大小数为2，&a[0]= 0x804908c,所以0x804908c+2+2=0x8049090

2传值汇编指令

假定全局数组a的声明为char *a[8]，a的首地址为0x80498c0，i 在ECX中，现要将a[i]取到EAX相应宽度的寄存器中，则所用的汇编指令是（ B ）。

A.mov 0x80498c0( , %ecx), %ah

B.mov 0x80498c0( , %ecx, 4), %eax

C.mov (0x80498c0, %ecx), %ah

D.mov (0x80498c0, %ecx, 4), %eax

解释：课程使用IA-32系统，指针型变量的大小为4Byte，%eax为目的寄存器

3传值汇编指令

假定全局数组a的声明为double *a[8]，a的首地址为0x80498c0，i 在ECX中，现要将a[i]取到EAX相应宽度的寄存器中，则所用的汇编指令是（D ）。

A.mov (0x80498c0, %ecx, 8), %eax

B.mov (0x80498c0, %ecx, 4), %eax

C.mov 0x80498c0( , %ecx, 8), %eax

D.mov 0x80498c0( , %ecx, 4), %eax

解释：IA-32系统不管是什么类型的指针，大小都为4byte

4传送首地址汇编指令

假定局部数组a的声明为int a[4]={0, -1, 300, 20}，a的首地址为R[ebp]-16，则将a的首地址取到EDX的汇编指令是（ B ）。

A.movl -16(%ebp ), %edx

B.leal -16(%ebp), %edx

C.movl -16(%ebp, 4), %edx

D.leal -16(%ebp, 4), %edx

解释：因为是取地址，所以用到加载有效地址leal，又因为是取首地址，无需变址和比例因子。

5详解`ptr+i`

某C语言程序中有以下两个变量声明：

int  a[10];

int  *ptr=&a[0];

则ptr+i的值为（ B ）。

A.&a[0]+8´i

B.&a[0]+4´i

C.&a[0]+2´i

D.&a[0]+i

解释：IA-32系统中不管是什么类型的指针，大小都为4B

6二维数组元素地址

假定静态short型二维数组b的声明如下：

static short b[2][4]={ {2, 9, -1, 5}, {3, 8, 2, -6}};

若b的首地址为0x8049820，则按行优先存储方式下，数组元素“8”的地址是（ D ）。

A.0x8049824

B.0x8049828

C.0x8049825

D.0x804982a

解释：8前面有5个元素，大小为5*2=10，所以地址为：0x8049820+a=0x804982a

7指针数组元素的值

假定静态short型二维数组b和指针数组pb的声明如下：

static short b[2][4]={ {2, 9, -1, 5}, {3, 1, -6, 2 }};

static short *pb[2]={b[0], b[1]};

若b的首地址为0x8049820，则pb[1]的值是（ C ）。

A.0x8049824

B.0x8049820

C.0x8049828

D.0x8049822

解释：这里问的是pb[1]这个数组元素的值，显然应该是b[1]，而b[1]应该是指二维数组b的第1行（从0开始）的起始地址。b每行有4个元素，每个元素占两个字节（short型），因而每行占8个字节，因而b的第1行首地址为0x8049820+8=0x8049828。

8指针数组元素的地址

假定静态short型二维数组b和指针数组pb的声明如下：

static short b[2][4]={ {2, 9, -1, 5}, {3, 1, -6, 2 }};

static short *pb[2]={b[0], b[1]};

若b的首地址为0x8049820，则&pb[1]的值是（ D ）。

A.0x8049830

B.0x8049832

C.0x8049838

D.0x8049834

解释：这里问的是pb[1]这个数组元素的地址，通常pb数组直接在b数组后面分配，因为b数组占2x8=16个单元，因此pb数组的首地址为0x8049820+16=0x8049830。而pb数组的每个元素是一个指针，故占4B，所以pb[1]的地址为0x8049830+4=0x8049834。

9结构体元素赋值汇编指令

假定结构体类型cont_info的声明如下：

struct cont_info {

char id[8];

char name [16];

unsigned post;

char address[100];

char phone[20];

} ;

若结构体变量x初始化定义为struct cont_info x={“00000010”, “ZhangS”, 210022, “273 long street, High Building #3015”, “12345678”}，x的首地址在EDX中，则“unsigned xpost=x.post;”对应汇编指令为（ C ）。

A.leal 0x24(%edx), %eax

B.movl 0x24(%edx), %eax

C.movl 0x18(%edx), %eax

D.leal 0x18(%edx), %eax

解释：0x18=24.且为赋值语句，用movl

10对齐方式

以下是关于IA-32处理器对齐方式的叙述，其中错误的是（ D ）。

A.可以用编译指导语句（如#pragma pack）设置对齐方式

B.不同操作系统采用的对齐策略可能不同

C.对于同一个struct型变量，在不同对齐方式下可能会占用不同大小的存储区

D.总是按其数据宽度进行对齐，例如，double型变量的地址总是8的倍数

计算机系统基础