之前的文章“哪里来的的 memset”【参考1】提到过因为编译器擅作主张使用memset优化引起了很诡异的问题。可以通过关闭编译优化来避免错误,这里从代码的角度分析 EDK2 是如何实现 memset 功能的。
- \MdePkg\Library\BaseMemoryLib\MemLibGeneric.c 提供了三个函数
InternalMemSetMem16
InternalMemSetMem32
InternalMemSetMem64
以 InternalMemSetMem16 为例:
/**
Fills a target buffer with a 16-bit value, and returns the target buffer.
@param Buffer The pointer to the target buffer to fill.
@param Length The count of 16-bit value to fill.
@param Value The value with which to fill Length bytes of Buffer.
@return Buffer
**/
VOID *
EFIAPI
InternalMemSetMem16 (
OUT VOID *Buffer,
IN UINTN Length,
IN UINT16 Value
)
{
for (; Length != 0; Length--) {
((UINT16*)Buffer)[Length - 1] = Value;
}
return Buffer;
}
看起来for (; Length != 0; Length--) 这样的定义足够“迷惑”编译器避免优化。
2. \MdePkg\Library\BaseMemoryLib\SetMem.c 提供了InternalMemSetMem()
/**
Set Buffer to Value for Size bytes.
@param Buffer The memory to set.
@param Length The number of bytes to set.
@param Value The value of the set operation.
@return Buffer
**/
VOID *
EFIAPI
InternalMemSetMem (
OUT VOID *Buffer,
IN UINTN Length,
IN UINT8 Value
)
{
//
// Declare the local variables that actually move the data elements as
// volatile to prevent the optimizer from replacing this function with
// the intrinsic memset()
//
volatile UINT8 *Pointer8;
volatile UINT32 *Pointer32;
volatile UINT64 *Pointer64;
UINT32 Value32;
UINT64 Value64;
if ((((UINTN)Buffer & 0x7) == 0) && (Length >= 8)) {
// Generate the 64bit value
Value32 = (Value << 24) | (Value << 16) | (Value << 8) | Value;
Value64 = LShiftU64 (Value32, 32) | Value32;
Pointer64 = (UINT64*)Buffer;
while (Length >= 8) {
*(Pointer64++) = Value64;
Length -= 8;
}
// Finish with bytes if needed
Pointer8 = (UINT8*)Pointer64;
} else if ((((UINTN)Buffer & 0x3) == 0) && (Length >= 4)) {
// Generate the 32bit value
Value32 = (Value << 24) | (Value << 16) | (Value << 8) | Value;
Pointer32 = (UINT32*)Buffer;
while (Length >= 4) {
*(Pointer32++) = Value32;
Length -= 4;
}
// Finish with bytes if needed
Pointer8 = (UINT8*)Pointer32;
} else {
Pointer8 = (UINT8*)Buffer;
}
while (Length-- > 0) {
*(Pointer8++) = Value;
}
return Buffer;
}
避免被编译器优化的方法和上面的类似,此外还可以看出这个函数特地用 8 bytes填充提升效率。
3. \MdePkg\Library\UefiMemoryLib\MemLib.c 中的InternalMemSetMem 函数直接调用 gBS 提供的服务
/**
Fills a target buffer with a byte value, and returns the target buffer.
This function wraps the gBS->SetMem().
@param Buffer Memory to set.
@param Size The number of bytes to set.
@param Value Value of the set operation.
@return Buffer.
**/
VOID *
EFIAPI
InternalMemSetMem (
OUT VOID *Buffer,
IN UINTN Size,
IN UINT8 Value
)
{
gBS->SetMem (Buffer, Size, Value);
return Buffer;
}
4. 通过volatile 申明变量避免编译器的优化,简单粗暴,很前面2提到的没有本质差别。volatile是一个类型修饰符(type specifier).volatile的作用是作为指令关键字,确保本条指令不会因编译器的优化而省略,且要求每次直接读值。volatile的变量是说这变量可能会被意想不到地改变,这样,编译器就不会去假设这个变量的值了。【参考2】
\EdkCompatibilityPkg\Foundation\Library\EdkIIGlueLib\Library\BaseMemoryLib\Ebc\SetMem.c
/**
Set Buffer to Value for Size bytes.
@param Buffer Memory to set.
@param Size Number of bytes to set
@param Value Value of the set operation.
@return Buffer
**/
VOID *
EFIAPI
InternalMemSetMem (
IN VOID *Buffer,
IN UINTN Size,
IN UINT8 Value
)
{
//
// Declare the local variables that actually move the data elements as
// volatile to prevent the optimizer from replacing this function with
// the intrinsic memset()
//
volatile UINT8 *Pointer;
Pointer = (UINT8*)Buffer;
while (Size-- != 0) {
*(Pointer++) = Value;
}
return Buffer;
}
5.汇编语言实现
\EdkCompatibilityPkg\Foundation\Library\CompilerStub\X64\memset.asm
\EdkCompatibilityPkg\Foundation\Library\CompilerStub\Ia32\memset.asm
IA32汇编的实现
.686
.model flat,C
.mmx
.code
;------------------------------------------------------------------------------
; VOID *
; memset (
; OUT VOID *Buffer,
; IN UINT8 Value,
; IN UINTN Count
; )
;------------------------------------------------------------------------------
memset PROC USES edi
mov al, [esp + 12]
mov ah, al
shrd edx, eax, 16
shld eax, edx, 16
mov ecx, [esp + 16] ; ecx <- Count
cmp ecx, 0 ; if Count == 0, do nothing
je @SetDone
mov edi, [esp + 8] ; edi <- Buffer
mov edx, ecx
and edx, 7
shr ecx, 3 ; # of Qwords to set
jz @SetBytes
add esp, -10h
movq [esp], mm0 ; save mm0
movq [esp + 8], mm1 ; save mm1
movd mm0, eax
movd mm1, eax
psllq mm0, 32
por mm0, mm1 ; fill mm0 with 8 Value's
@@:
movq [edi], mm0
add edi, 8
loop @B
movq mm0, [esp] ; restore mm0
movq mm1, [esp + 8] ; restore mm1
add esp, 10h ; stack cleanup
@SetBytes:
mov ecx, edx
rep stosb
@SetDone:
mov eax, [esp + 8] ; eax <- Buffer as return value
ret
memset ENDP
END
上面就是实现 SetMem 函数的基本方法,如果在 Porting 代码到 UEFI时遇到 MemSet 的错误,不妨试试直接将上面的代码搬迁到程序中。
参考:
- http://www.lab-z.com/stu136/ Step to UEFI (136)哪里来的的 memset
- https://baike.baidu.com/item/volatile/10606957?fr=aladdin volatile