explorer

万丈高楼平地起,勿在浮沙筑高台

0%

[What]Linux文件系统基本操作

参考宋宝华老师的书 Linux设备驱动开发详解 ,来整理文件系统操作。

编程接口

此节只列出Linux和c库操作接口,具体的详细信息还是要找 man

  • 其实在实际应用中,还是尽量使用标准的 c/c++ 库便于以后移植。

需要注意的是:标准的ISOC库的I/O操作默认是带有缓存的,也就是填充一定的缓存后才会去调用系统接口。 而如果直接使用POSIX标准的系统接口,相当于上层没有做缓存,但实际上 内核为了尽量批量化的操作I/O,其内部也会做缓存。

常用接口

文件的创建

  • linux
1
2
3
4
5
6
7
8
9
10
11
12
/**
* @brief 设置文件在创建时需要去掉的权限
* @note : 此设置仅影响此进程的umask,不会影响shell默认的umask
* 使用shell命令 umask可以查看shell 设置输出依次为: <special bits><user><group><other>
*/

int umask(int newmask);
/**
* @brief 创建文件并设置权限
* @note 权限与 umask 相与决定最终的权限
* - 此函数使用较少,一般使用 open(path, O_RDWR | O_CREAT | O_TRUNC, mode);
*/

int creat(const char *pathname, mode_t mode);
mode value 含义
S_IRUSR 用户可读
S_IWUSR 用户可写
S_IXUSR 用户可执行
S_IRWXU 用户可读、写、执行
S_IRGRP 组可读
S_IWGRP 组可写
S_IXGRP 组可执行
S_IRWXG 组可读、写、执行
S_IROTH 其他人可读
S_IWOTH 其他人可写
S_IXOTH 其他人可执行
S_IRWXO 其他人可读、写、执行
S_ISUID 设置用户执行ID
S_ISGID 设置组执行ID
  • c 创建及打开
1
FILE *fopen( const char *filename, const char *mode );

详解: fopen

文件打开

  • linux
1
2
3
4
5
6
7
8
/**
* @brief 以flags指定的方式打开 pathname 指定的文件
* @note
* 1. 当 flags 为 O_CREAT 时,需要指定其 mode
* 2. 当pathname为相对路径时,可以使用 openat 为其指定一个 dirfd,以此dir为相对路径
*/

int open(const char *pathname, int flags, .../*mode_t mode*/);
int openat(int dirfd, const char *pathname, int flags, .../*mode_t mode*/);
flags value 含义
O_RDONLY 只读方式打开
O_WRONLY 只写方式打开
O_RDWR 读写方式打开
O_APPEND 追加方式打开
O_CREAT 创建
O_EXCL 如果使用了O_CREATE且文件存在,就会发生错误
O_NOBLOCK 以非阻塞的方式打开
O_TRUNC 如果文件存在则删除其内容
  • 在POSIX标准中的标准输入、输出、错误对应的宏依次为 STDIN_FILENO,STDOUT_FILENO,STDERR_FILENO 位于头文件 <unistd.h>

文件读写

  • linux 进行read和write大量数据读写时,需要考虑单次读写的字节数,取文件系统的block大小(比如4096字节),能在尽量减小系统调用的同时保证较高的写入效率。
1
2
ssize_t read(int fd, void *buf, size_t count);
ssize_t write(int fd, const void *buf, size_t count);
  • c
1
2
3
4
size_t fread( void          *buffer, size_t size, size_t count,
FILE *stream );
size_t fwrite( const void *buffer, size_t size, size_t count,
FILE *stream );

详解: fread fwrite

文件定位

  • linux
1
2
3
4
5
6
7
/**
* @brief 以 whence 为起始移动 offset 字节
* @note
* 1. 获取当前文件位置使用 currpos = lseek(fd, 0, SEEK_CUR);
* 当返回负数代表此文件对象不能做移动操作,比如FIFO,SOCKET
*/

off_t lseek(int fd, off_t offset, int whence);
whence value 含义
SEEK_SET 文件开头
SEEK_CUR 当前位置
SEEK_END 文件尾
  • c
1
int fseek( FILE *stream, long offset, int origin );

详解: fseek

截断文件

  • linux
1
2
3
4
5
/**
* @brief : 从文件末尾到文件头的顺序截断length字节
*/

int truncate(const char *path, off_t length);
int ftruncate(int fd, off_t length);

文件关闭

  • linux
1
int close(int fd);
  • c
1
int fclose( FILE *stream );

详解: fclose

文件夹操作

  • 新建
1
2
int mkdir(const char *pathname, mode_t mode);
int mkdirat(int dirfd, const char *pathname, mode_t mode);
  • 移除
1
int rmdir(const char *pathname);

实例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*!
* ### 文件操作
* 1. 创建
* > int create(const char *filename, mode_t mode);
* > mode 与 umask (mode & umask)共同决定文件的最终权限
* > int umask(int newmask);
*
* > FILE *fopen(const char *path, const char *mode);
* > mode --> "r"/"rb"/"w"/"wb"/"a"/"ab"/"r+"/"r+b"/"rb+"/"w+"/"w+b"/"wb+"/"a+"/"a+b"/"ab+"
* 2. 打开
* > int open(const char *pathname, int flags);
* > int open(const char *pathname, int flags, mode_t mode);
* > flag --> O_RDONLY / O_WRONLY / O_RDWR / O_APPEND / O_CREAT / O_EXEC / O_NOBLOCK / O_TRUNC;
* > mode --> S_IRUSR / S_IWUSR/ S_IXUSR / S_IRWXU / S_IRGRP / S_IWGRP / S_IXGRP / S_IRWXGRP / S_IROTH / S_IWOTH / S_IXOTH / S_IRWXO / S_ISUID / S_ISGID;
*
* 3. 读写
* > int read(int fd, const void *buf, size_t length);
* > int write(int fd, const void *buf, size_t length);
*
* > int fgetc(FILE *stream);
* > int fputc(int c, FILE *stream);
* > char *fgets(char *s, int n, FILE *stream);
* > int fputs(const char *s, FILE *stream);
* > int fprintf(FILE *stream, const char *format, ...);
* > int fscanf(FILE *stream, const char *format, ...);
* > size_t fread(void *ptr, size_t size, size_t n, FILE *stream);
* > size_t fwrite(const void *ptr, size_t size, size_t n, FILE *stream);
*
* 4. 定位
* > int lseek(int fd, offset_t offset, int whence);
* > whence --> SEEK_SET / SEEK_CUR / SEEK_END;
* > 得到文件长度 lseek(fd, 0, SEEK_END);
*
* > int fgetpos(FILE *stream, fpos_t *pos);
* > int fsetpos(FILE *stream, const fpos_t *pos);
* > int fseek(FILE *stream, long offset, int whence);
*
* 5. 关闭
* > int close(int fd);
*
* > int fclose(FILE *stream);
*
*/

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>

#define LENGTH (100)

int main(int argc, char *argv[])
{

int fd, len;
char str[LENGTH];
FILE *p_fd;

fd = open("hello.txt", O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if(fd)
{
write(fd, "Hello world", strlen("Hello world"));
close(fd);
}
p_fd = fopen("hello_lib.txt", "w+");
if(p_fd)
{
fputs("Hello world! ^_^ \n", p_fd);
fclose(p_fd);
}

fd = open("hello.txt", O_RDWR);
len = read(fd, str, LENGTH);
str[len] = '\0';
printf("%s\n", str);
close(fd);

p_fd = fopen("hello_lib.txt", "r");
fgets(str, LENGTH, p_fd);
printf("%s\n", str);
fclose(p_fd);
}

多个进程打开同一个文件

多个进程打开同一个文件时,每个进程的 task_struct 都会包含此文件的资源描述,但是最终它们都是指向同一个 inode

  • 每个文件资源描述都包含对该文件的操作状态,位置偏移等信息
  • 当进行 lseek 这种操作时,如果没有造成文件的扩大,其实是直接操作的资源描述结构体,而没有去操作inode。

互斥操作

如果有多个进程在操作同一个文件,则很有可能会造成竞态,有以下方式来避免此问题的发生:

  • pread , pwrite
1
2
3
4
5
6
7
8
9
/**
* @brief : 在文件为起始的 offset 字节处开始读或者写
* @note :
* 1. 这两个函数的操作是原子性的
* 2. 此函数并不会改变对应进程本身所保存的 offset
*/

ssize_t pread(int fd, void *buf, size_t count, off_t offset);

ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset);

文件索引的复制

使用以下函数可以完成文件索引的复制(也就是两个不同的索引指向同一个文件描述资源,它们具有联动的偏移位置)

1
2
int dup(int oldfd);
int dup2(int oldfd, int newfd);

主动写回数据到硬盘

一般的文件读写数据都会被存在 page cache 中,待内核在合适的时间写入硬盘,为了强制同步,可以使用下面函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
* @brief 以阻塞的方式等待某个文件同步
*/

int fsync(int fd);

/**
* @brief 以阻塞的方式同步文件数据,文件的元数据不一定会同步
* @note : 只有一些重要的修改才会同步元数据,比如文件大小改变了
* 但文件的方式文件改变了,是不会同步元数据的
*/

int fdatasync(int fd);

/**
* @brief 给内核发送同步消息,并不会等待内核操作完成
* @note shell 中的 sync 命令 也是调用的此函数
*/

void sync(void);

文件运行时控制

当一个文件已经打开,要修改它的一些属性时,可以使用函数 fcntl .

1
int fcntl(int fd, int cmd, ... /* arg */ );

此函数具有以下用途:

  1. 生成一个文件描述符的副本
  2. 获取或设置文件描述符标记
  3. 获取或设置文件状态
  4. 获取或设置文件拥有者关系
  5. 获取或设置文件锁

需要注意的是: 当要修改某个文件状态时,应该像操作寄存器位那样通过 读-修改-写 的方式操作(也就是先读取当前设置值,然后写入新设置的那一位,再回写回去)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>

int main(void)
{

int status = 0;
int fd = open("./test", O_CREAT | O_WRONLY);
if(fd == -1)
{
perror("open file failed:");
goto quick_out;
}
if((status = fcntl(fd, F_GETFL, 0)) == -1)
{
perror("can not get file status:");
goto close_out;
}
switch(status & O_ACCMODE)
{
case O_RDONLY:
{
printf("read only\n");
}break;
case O_WRONLY:
{
printf("write only\n");
}break;
case O_RDWR:
{
printf("read write\n");
}break;
default:
printf("can not get file mode!\n");
}
if(status & O_APPEND)
{
printf("append\n");
}
if(status & O_NONBLOCK)
{
printf("nonblocking\n");
}
if(status & O_SYNC)
{
printf("synchronous writes\n");
}
close_out:
close(fd);
remove("./test");
quick_out:
return 0;
}

另外的一个控制函数便是 ioctl ,这个在驱动的操作中经常使用:

1
int ioctl(int fd, unsigned long request, ...);

文件的权限与属性

获取文件属性

平时使用最多的 shell 命令 ls -al 就是提取的文件属性来显示。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
struct stat {
dev_t st_dev; /* ID of device containing file */
ino_t st_ino; /* inode number */
mode_t st_mode; /* file type */
nlink_t st_nlink; /* number of hard links */
uid_t st_uid; /* user ID of owner */
gid_t st_gid; /* group ID of owner */
dev_t st_rdev; /* device ID (if special file) */
off_t st_size; /* total size, in bytes */
blksize_t st_blksize; /* blocksize for filesystem I/O */
blkcnt_t st_blocks; /* number of 512B blocks allocated */

/* Since Linux 2.6, the kernel supports nanosecond
precision for the following timestamp fields.
For the details before Linux 2.6, see NOTES. */


struct timespec st_atim; /* time of last access */
struct timespec st_mtim; /* time of last modification */
struct timespec st_ctim; /* time of last status change */

#define st_atime st_atim.tv_sec /* Backward compatibility */
#define st_mtime st_mtim.tv_sec
#define st_ctime st_ctim.tv_sec
};

/**
* @brief 获取文件的属性并存储于结构 stat 中
*/

//如果文件是符号链接,那么获取被链接文件的属性
int stat(const char *pathname, struct stat *buf);
//获取已打开文件的属性
int fstat(int fd, struct stat *buf);
//如果文件是符号链接,那么获取该符号链接的属性
int lstat(const char *pathname, struct stat *buf);

//根据用户指定的 dirfd 和提供的路径 pathname 来获取文件属性,
//flags 用于控制是否读取符号链接本身
int fstatat(int dirfd, const char *pathname, struct stat *buf,
int flags)
;


/**
* @brief 也可以使用下面的函数修改时间戳
*/

int futimes(int fd, const struct timeval tv[2]);

int lutimes(const char *filename, const struct timeval tv[2]);

int utimensat(int dirfd, const char *pathname,
const struct timespec times[2], int flags)
;


int futimens(int fd, const struct timespec times[2]);

如下代码所示,使用lstat来判断文件类型:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

int main(int argc, char *argv[])
{

struct stat file_stat = {0};

if(argc != 2)
{
printf("usage: ./a.out <file_path>\n");
goto quick_out;
}
if(lstat(argv[1], &file_stat) == -1)
{
perror("can not get file status:");
goto quick_out;
}
printf("The file type is : ");
if(S_ISREG(file_stat.st_mode))
{
printf("regular file");
}
else if(S_ISDIR(file_stat.st_mode))
{
printf("directory");
}
else if(S_ISSOCK(file_stat.st_mode))
{
printf("socket");
}
else if(S_ISCHR(file_stat.st_mode))
{
printf("character device");
}
else if(S_ISBLK(file_stat.st_mode))
{
printf("block device");
}
else if(S_ISFIFO(file_stat.st_mode))
{
printf("FIFO");
}
else if(S_ISLNK(file_stat.st_mode))
{
printf("symbolic link");
}
else
{
printf("unknown!");
}

printf("\n");

quick_out:
return 0;
}

操作文件的权限

与操作文件相关的ID具有下面几类:

类型 说明
真实用户ID和真实组ID 表示当前是哪个用户位于哪个组正在访问此文件
有效用户ID,有效组ID和补充组ID 表示该文件允许的用户和组(在没有suid,sgid的情况下,此值与真实用户和真实组ID是一个值)
suid 当文件user的可执行权限打开并设置了suid后,其他用户可以以该文件所有者的权限来运行此文件
sgid 当文件group的可执行权限打开并设置了sgid后,其他用户可以以该文件组成员的权限来运行此文件
  • 对于权限方面还有一个(sticky bit):当文件other的可执行权限打开并设置了sticky后,用户都可以在此文件夹下新建文件和文件夹(类似于共享文件夹)
    • 但用户不能删除其他用户所新建的文件或文件夹
  • 对于普通权限 rwx 不得不提的是:
    • 要进入基本的目录,至少要具有 x 权限,要读取目录内容列表信息,至少要具有 rx 权限。
    • 对一个文件是否具有新建或删除的权限,要看用户对此目录是否具有 rw 权限。
      • 这与文件自身的权限无关, 自身权限只关联其内容的操作权限

可以使用下面的函数来判断当前进程是否有权限访问某个文件:

1
2
int access(const char *pathname, int mode);
int faccessat(int dirfd, const char *pathname, int mode, int flags);

修改权限

1
2
3
4
5
6
7
8
9
10
//修改文件权限
int chmod(const char *pathname, mode_t mode);
int fchmod(int fd, mode_t mode);
int fchmodat(int dirfd, const char *pathname, mode_t mode, int flags);
//改变用户id和组id
int chown(const char *pathname, uid_t owner, gid_t group);
int fchown(int fd, uid_t owner, gid_t group);
int lchown(const char *pathname, uid_t owner, gid_t group);
int fchownat(int dirfd, const char *pathname,
uid_t owner, gid_t group, int flags)
;

硬链接

每增加一个硬链接,文件的链接数量加1,以表示有多少个文件引用到同一个inode.

1
2
3
int link(const char *oldpath, const char *newpath);
int linkat(int olddirfd, const char *oldpath,
int newdirfd, const char *newpath, int flags)
;

每取消一个硬链接,文件的链接数量减1,当一个文件的链接数量减至0 并且没有进程打开此文件时 ,文件既被删除。

  • 当有进程打开了文件,那么当进程退出或关闭文件时,内核检查引用计数为0,才删除文件。
1
2
3
4
int unlink(const char *pathname);
int unlinkat(int dirfd, const char *pathname, int flags);
//移除文件时,与 unlink 一致,移除文件夹时,与 rmdir 一致
int remove(const char *pathname);

符号链接

  • 创建
1
2
int symlink(const char *target, const char *linkpath);
int symlinkat(const char *target, int newdirfd, const char *linkpath);
  • 读取符号链接本身内容(可以看到其block内容为其引用文件路径)
1
2
3
ssize_t readlink(const char *pathname, char *buf, size_t bufsiz);
ssize_t readlinkat(int dirfd, const char *pathname,
char *buf, size_t bufsiz);

名称

1
2
3
4
5
6
int rename(const char *oldpath, const char *newpath);
int renameat(int olddirfd, const char *oldpath,
int newdirfd, const char *newpath)
;


int renameat2(int olddirfd, const char *oldpath,
int newdirfd, const char *newpath, unsigned int flags)
;

文件系统与设备驱动(include/linux/fs.h)

在设备驱动中,会关心file和inode这两个结构体。

  • 每打开一个文件,在内核空间中就有与之关联的 file 结构体
    • 设备驱动通过此结构体判断用户操作模式(比如是阻塞还是非阻塞等)
      • 判断阻塞还是非阻塞使用 f_flags
    • private_data 保存该设备驱动申请的数据地址
  • inode 则包含了一个文件的详细信息

file

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;

/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/

spinlock_t f_lock;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;

u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;

#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */

inode

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
* Keep mostly read-only and often accessed (especially for
* the RCU path lookup and 'stat' data) fields at the beginning
* of the 'struct inode'
*/

struct inode {
umode_t i_mode;
unsigned short i_opflags;
kuid_t i_uid;
kgid_t i_gid;
unsigned int i_flags;

#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif

const struct inode_operations *i_op;
struct super_block *i_sb;
struct address_space *i_mapping;

#ifdef CONFIG_SECURITY
void *i_security;
#endif

/* Stat data, not accessed from path walking */
unsigned long i_ino;
/*
* Filesystems may only read i_nlink directly. They shall use the
* following functions for modification:
*
* (set|clear|inc|drop)_nlink
* inode_(inc|dec)_link_count
*/

union {
const unsigned int i_nlink;
unsigned int __i_nlink;
};
dev_t i_rdev;
loff_t i_size;
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
unsigned int i_blkbits;
blkcnt_t i_blocks;

#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif

/* Misc */
unsigned long i_state;
struct mutex i_mutex;

unsigned long dirtied_when; /* jiffies of first dirtying */
unsigned long dirtied_time_when;

struct hlist_node i_hash;
struct list_head i_io_list; /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *i_wb; /* the associated cgroup wb */

/* foreign inode detection, see wbc_detach_inode() */
int i_wb_frn_winner;
u16 i_wb_frn_avg_time;
u16 i_wb_frn_history;
#endif
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
union {
struct hlist_head i_dentry;
struct rcu_head i_rcu;
};
u64 i_version;
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
#ifdef CONFIG_IMA
atomic_t i_readcount; /* struct files open RO */
#endif
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock_context *i_flctx;
struct address_space i_data;
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
char *i_link;
};

__u32 i_generation;

#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
struct hlist_head i_fsnotify_marks;
#endif

void *i_private; /* fs or device private pointer */
};
  • i_rdev 表示设备编号,由高12位主设备号和低20位次设备号组成,使用下面的函数获取主次设备号
    • 主设备号代表同一类设备,次设备号表示使用该设备的实例对象
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#define MINORBITS	20
#define MINORMASK ((1U << MINORBITS) - 1)

#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))

static inline unsigned iminor(const struct inode *inode)
{

return MINOR(inode->i_rdev);
}

static inline unsigned imajor(const struct inode *inode)
{

return MAJOR(inode->i_rdev);
}
  • 也可以在 /proc/devices 中得到注册设备的主设备号和设备名
cat /proc/devices
  • 可以在 /dev/ 下得到注册设备的主次设备号

udev 用户空间设备管理

Linux设计中强调的一个基本观点是机制和策略分离。
机制是做某样事情的固定步骤、方法,而策略是每一个步骤所采取的不同方式。
机制是固定的,而每个步骤采用的策略是不固定的。机制是稳定的,而策略是灵活的。
因此,在Linux内核中,不应该实现策略。

udev完全在用户态工作,利用设备加入或移出时内核所发送的热拔插事件(Hotplug Event)来工作。 在热拔插时,设备的详细信息会由内核通过netlink套接字发送出来,发出的事件叫uevent。 udev的设备命名策略、权限控制和事件处理都是在用户态下完成的,它利用从内核收到的信息来进行创建设备文件节点等工作。

udev的工作过程:

  1. 当内核检测到系统中出现了新设备后,内核会通过netlink套接字发送uevent
  2. udev获取内核发送的信息,进行规则的匹配。匹配的事物包括SUBSYSTEM,ACTION,attribute,内核提供的名称(通过KERNEL=)以及其他的环境变量
    • 关于udev的具体使用可以查看此处

sysfs

sysfs是内核设备模型的一个全局概览,此目录下的多个顶层文件是站在不同的角度来查看设备模型的:

  • bus 是以总线的视角来看待。
    • 首先,总线有很多种类型,所以在bus目录下会有多个代表不同总线类型的文件
    • 其次,每种总线相对应的就包含设备和驱动,所以就会有 devices,drivers 文件夹
      • 设备下的文件是 /sys/devices 中文件的符号链接
  • devices 是以设备的视角看待
    • 首先,设备是以层级的方式拓扑的,所以目录也是以此层级进行排列的
    • 其次,当设备与驱动匹配以后,对应设备目录就会有 driver 目录
  • class 是以设备种类的视角看待设备
    • 此目录下都是以种类区分各种设备
    • 设备下的文件是 /sys/devices 中文件的符号链接
  • block 是单独列出块设备文件
  • dev 是块设备和字符设备文件

在代码实现中,分别使用 bus_type,device_driver,device 来描述总线、驱动和设备:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/**
* struct bus_type - The bus type of the device
*
* @name: The name of the bus.
* @dev_name: Used for subsystems to enumerate devices like ("foo%u", dev->id).
* @dev_root: Default device to use as the parent.
* @dev_attrs: Default attributes of the devices on the bus.
* @bus_groups: Default attributes of the bus.
* @dev_groups: Default attributes of the devices on the bus.
* @drv_groups: Default attributes of the device drivers on the bus.
* @match: Called, perhaps multiple times, whenever a new device or driver
* is added for this bus. It should return a nonzero value if the
* given device can be handled by the given driver.
* @uevent: Called when a device is added, removed, or a few other things
* that generate uevents to add the environment variables.
* @probe: Called when a new device or driver add to this bus, and callback
* the specific driver's probe to initial the matched device.
* @remove: Called when a device removed from this bus.
* @shutdown: Called at shut-down time to quiesce the device.
*
* @online: Called to put the device back online (after offlining it).
* @offline: Called to put the device offline for hot-removal. May fail.
*
* @suspend: Called when a device on this bus wants to go to sleep mode.
* @resume: Called to bring a device on this bus out of sleep mode.
* @pm: Power management operations of this bus, callback the specific
* device driver's pm-ops.
* @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU
* driver implementations to a bus and allow the driver to do
* bus-specific setup
* @p: The private data of the driver core, only the driver core can
* touch this.
* @lock_key: Lock class key for use by the lock validator
*
* A bus is a channel between the processor and one or more devices. For the
* purposes of the device model, all devices are connected via a bus, even if
* it is an internal, virtual, "platform" bus. Buses can plug into each other.
* A USB controller is usually a PCI device, for example. The device model
* represents the actual connections between buses and the devices they control.
* A bus is represented by the bus_type structure. It contains the name, the
* default attributes, the bus' methods, PM operations, and the driver core's
* private data.
*/

struct bus_type {
const char *name;
const char *dev_name;
struct device *dev_root;
struct device_attribute *dev_attrs; /* use dev_groups instead */
const struct attribute_group **bus_groups;
const struct attribute_group **dev_groups;
const struct attribute_group **drv_groups;

int (*match)(struct device *dev, struct device_driver *drv);
int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
int (*probe)(struct device *dev);
int (*remove)(struct device *dev);
void (*shutdown)(struct device *dev);

int (*online)(struct device *dev);
int (*offline)(struct device *dev);

int (*suspend)(struct device *dev, pm_message_t state);
int (*resume)(struct device *dev);

const struct dev_pm_ops *pm;

const struct iommu_ops *iommu_ops;

struct subsys_private *p;
struct lock_class_key lock_key;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/**
* struct device_driver - The basic device driver structure
* @name: Name of the device driver.
* @bus: The bus which the device of this driver belongs to.
* @owner: The module owner.
* @mod_name: Used for built-in modules.
* @suppress_bind_attrs: Disables bind/unbind via sysfs.
* @probe_type: Type of the probe (synchronous or asynchronous) to use.
* @of_match_table: The open firmware table.
* @acpi_match_table: The ACPI match table.
* @probe: Called to query the existence of a specific device,
* whether this driver can work with it, and bind the driver
* to a specific device.
* @remove: Called when the device is removed from the system to
* unbind a device from this driver.
* @shutdown: Called at shut-down time to quiesce the device.
* @suspend: Called to put the device to sleep mode. Usually to a
* low power state.
* @resume: Called to bring a device from sleep mode.
* @groups: Default attributes that get created by the driver core
* automatically.
* @pm: Power management operations of the device which matched
* this driver.
* @p: Driver core's private data, no one other than the driver
* core can touch this.
*
* The device driver-model tracks all of the drivers known to the system.
* The main reason for this tracking is to enable the driver core to match
* up drivers with new devices. Once drivers are known objects within the
* system, however, a number of other things become possible. Device drivers
* can export information and configuration variables that are independent
* of any specific device.
*/

struct device_driver {
const char *name;
struct bus_type *bus;

struct module *owner;
const char *mod_name; /* used for built-in modules */

bool suppress_bind_attrs; /* disables bind/unbind via sysfs */
enum probe_type probe_type;

const struct of_device_id *of_match_table;
const struct acpi_device_id *acpi_match_table;

int (*probe) (struct device *dev);
int (*remove) (struct device *dev);
void (*shutdown) (struct device *dev);
int (*suspend) (struct device *dev, pm_message_t state);
int (*resume) (struct device *dev);
const struct attribute_group **groups;

const struct dev_pm_ops *pm;

struct driver_private *p;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/**
* struct device - The basic device structure
* @parent: The device's "parent" device, the device to which it is attached.
* In most cases, a parent device is some sort of bus or host
* controller. If parent is NULL, the device, is a top-level device,
* which is not usually what you want.
* @p: Holds the private data of the driver core portions of the device.
* See the comment of the struct device_private for detail.
* @kobj: A top-level, abstract class from which other classes are derived.
* @init_name: Initial name of the device.
* @type: The type of device.
* This identifies the device type and carries type-specific
* information.
* @mutex: Mutex to synchronize calls to its driver.
* @bus: Type of bus device is on.
* @driver: Which driver has allocated this
* @platform_data: Platform data specific to the device.
* Example: For devices on custom boards, as typical of embedded
* and SOC based hardware, Linux often uses platform_data to point
* to board-specific structures describing devices and how they
* are wired. That can include what ports are available, chip
* variants, which GPIO pins act in what additional roles, and so
* on. This shrinks the "Board Support Packages" (BSPs) and
* minimizes board-specific #ifdefs in drivers.
* @driver_data: Private pointer for driver specific info.
* @power: For device power management.
* See Documentation/power/devices.txt for details.
* @pm_domain: Provide callbacks that are executed during system suspend,
* hibernation, system resume and during runtime PM transitions
* along with subsystem-level and driver-level callbacks.
* @pins: For device pin management.
* See Documentation/pinctrl.txt for details.
* @msi_list: Hosts MSI descriptors
* @msi_domain: The generic MSI domain this device is using.
* @numa_node: NUMA node this device is close to.
* @dma_mask: Dma mask (if dma'ble device).
* @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
* hardware supports 64-bit addresses for consistent allocations
* such descriptors.
* @dma_pfn_offset: offset of DMA memory range relatively of RAM
* @dma_parms: A low level driver may set these to teach IOMMU code about
* segment limitations.
* @dma_pools: Dma pools (if dma'ble device).
* @dma_mem: Internal for coherent mem override.
* @cma_area: Contiguous memory area for dma allocations
* @archdata: For arch-specific additions.
* @of_node: Associated device tree node.
* @fwnode: Associated device node supplied by platform firmware.
* @devt: For creating the sysfs "dev".
* @id: device instance
* @devres_lock: Spinlock to protect the resource of the device.
* @devres_head: The resources list of the device.
* @knode_class: The node used to add the device to the class list.
* @class: The class of the device.
* @groups: Optional attribute groups.
* @release: Callback to free the device after all references have
* gone away. This should be set by the allocator of the
* device (i.e. the bus driver that discovered the device).
* @iommu_group: IOMMU group the device belongs to.
*
* @offline_disabled: If set, the device is permanently online.
* @offline: Set after successful invocation of bus type's .offline().
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
* that the device model core needs to model the system. Most subsystems,
* however, track additional information about the devices they host. As a
* result, it is rare for devices to be represented by bare device structures;
* instead, that structure, like kobject structures, is usually embedded within
* a higher-level representation of the device.
*/

struct device {
struct device *parent;

struct device_private *p;

struct kobject kobj;
const char *init_name; /* initial name of the device */
const struct device_type *type;

struct mutex mutex; /* mutex to synchronize calls to
* its driver.
*/


struct bus_type *bus; /* type of bus device is on */
struct device_driver *driver; /* which driver has allocated this
device */

void *platform_data; /* Platform specific data, device
core doesn't touch it */

void *driver_data; /* Driver data, set and get with
dev_set/get_drvdata */

struct dev_pm_info power;
struct dev_pm_domain *pm_domain;

#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
struct irq_domain *msi_domain;
#endif
#ifdef CONFIG_PINCTRL
struct dev_pin_info *pins;
#endif
#ifdef CONFIG_GENERIC_MSI_IRQ
struct list_head msi_list;
#endif

#ifdef CONFIG_NUMA
int numa_node; /* NUMA node this device is close to */
#endif
u64 *dma_mask; /* dma mask (if dma'able device) */
u64 coherent_dma_mask;/* Like dma_mask, but for
alloc_coherent mappings as
not all hardware supports
64 bit addresses for consistent
allocations such descriptors. */

unsigned long dma_pfn_offset;

struct device_dma_parameters *dma_parms;

struct list_head dma_pools; /* dma pools (if dma'ble) */

struct dma_coherent_mem *dma_mem; /* internal for coherent mem
override */

#ifdef CONFIG_DMA_CMA
struct cma *cma_area; /* contiguous memory area for dma
allocations */

#endif
/* arch specific additions */
struct dev_archdata archdata;

struct device_node *of_node; /* associated device tree node */
struct fwnode_handle *fwnode; /* firmware device node */

dev_t devt; /* dev_t, creates the sysfs "dev" */
u32 id; /* device instance */

spinlock_t devres_lock;
struct list_head devres_head;

struct klist_node knode_class;
struct class *class;
const struct attribute_group **groups; /* optional groups */

void (*release)(struct device *dev);
struct iommu_group *iommu_group;

bool offline_disabled:1;
bool offline:1;
};

每当设备或驱动注册进内核时,bus都会进行匹配检查,只要符合匹配标准便连接二者。

总线、设备和驱动都会映射在 sysfs 中,其中的目录来源于 bus_type,device_driver,device , 而目录中的文件来源于 attribute .

Last Updated 2020-06-28 Sun 12:00.
Render by hexo-renderer-org with Emacs 26.3 (Org mode 9.3.7)