System Calls
- Linux 系统调用实现
- VDSO and 虚拟系统调用
- 通过系统调用访问用户空间
+-------------+ +-------------+
| 应用程序 | | 应用程序 |
+-------------+ +-------------+
| |
|read(fd, buff, len) |fork()
| |
v v
+---------------------------------------+
| 内核 |
+---------------------------------------+
+-------------+ dup2 +-----------------------------+
| 应用程序 |-----+ | libc |
+-------------+ | | |
+---->| C7590 dup2: |
| ... |
| C7592 movl 0x8(%esp),%ecx |
| C7596 movl 0x4(%esp),%ebx |
| C759a movl $0x3f,%eax |
+------------------------------+ C759f int $0x80 |
| | ... +<-----+
| +-----------------------------+ |
| |
| |
| |
| |
| +------------------------------------------------------------+ |
| | 内核 | |
| | | |
+--->|ENTRY(entry_INT80_32) | |
| ASM_CLAC | |
| pushl %eax # pt_regs->orig_ax | |
| SAVE_ALL pt_regs_ax=$-ENOSYS # save rest | |
| ... | |
| movl %esp, %eax | |
| call do_int80_syscall_32 | |
| .... | |
| RESTORE_REGS 4 # skip orig_eax/error_code | |
| ... | |
| INTERRUPT_RETURN +-+
+------------------------------------------------------------+
/* 处理(handle)int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
/* Linux x86 32 位系统调用分发器的简化版本 */
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
unsigned int nr = regs->orig_ax;
if (nr < IA32_NR_syscalls)
regs->ax = ia32_sys_call_table[nr](regs->bx, regs->cx,
regs->dx, regs->si,
regs->di, regs->bp);
syscall_return_slowpath(regs);
}
#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
const sys_call_ptr_t ia32_sys_call_table[] = {
[0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h>
};
__SYSCALL_I386(0, sys_restart_syscall)
__SYSCALL_I386(1, sys_exit)
__SYSCALL_I386(2, sys_fork)
__SYSCALL_I386(3, sys_read)
__SYSCALL_I386(4, sys_write)
#ifdef CONFIG_X86_32
__SYSCALL_I386(5, sys_open)
#else
__SYSCALL_I386(5, compat_sys_open)
#endif
__SYSCALL_I386(6, sys_close)
- 写时复制(Copy on Write)、需求分页(demand paging)、交换(swapping):故障地址和引发故障的地址都在用户空间;故障地址有效(在用户地址空间进行检查)。
- 在系统调用中使用无效指针:引发故障的地址在内核空间;故障地址在用户空间且无效。
- 内核错误(内核访问无效指针):与上述情况相同。
成本 | 指针检查 | 故障处理 |
---|---|---|
有效地址 | 地址空间搜索 | 可忽略的 |
无效地址 | 地址空间搜索 | 异常表搜索 |
/* 如果 user_ptr 无效,则返回 -EFAULT */
if (copy_from_user(&kernel_buffer, user_ptr, size))
return -EFAULT;
/* 只有当 user_ptr 有效时才能工作,否则会导致内核崩溃 */
memcpy(&kernel_buffer, user_ptr, size);
#define get_user(x, ptr) \
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
might_fault(); \
asm volatile("call __get_user_%P4" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
})
#define get_user(x, ptr) \
movl ptr, %eax \
call __get_user_1 \
movl %edx, x \
movl %eax, result \
.text
ENTRY(__get_user_1)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
ENDPROC(__get_user_1)
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
ASM_CLAC
ret
END(bad_get_user)
_ASM_EXTABLE(1b,bad_get_user)
/* 异常表条目 */
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \
.balign 4 ; \
.long (from) - . ; \
.long (to) - . ; \
.long (handler) - . ; \
.popsection
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
#define EXCEPTION_TABLE(align) \
. = ALIGN(align); \
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ex_table) = .; \
KEEP(*(__ex_table)) \
VMLINUX_SYMBOL(__stop___ex_table) = .; \
}
bool ex_handler_default(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
int fixup_exception(struct pt_regs *regs, int trapnr)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(regs->ip);
if (!e)
return 0;
handler = ex_fixup_handler(e);
return handler(e, regs, trapnr);
}