前段时间实现的C协程依赖栈传递参数,在开启优化时会导致错误,于是实现了一个ucontext的版本,但ucontext的切换效率太差了,
在我的机器上执行4000W次切换需要11秒左右,这达不到我的要求,所以重新设计了实现,使得在开启优化时也能得到正确的结果.
并且效率也令人满意,4000W次切换仅需要730ms左右,足足比ucontext的实现快乐近15倍。
下面贴出实现:
#include "uthread.h"#include#include #include #include "link_list.h"struct uthread{ int32_t reg[8];//0:esp,1:ebp,2:eax,3:ebx,4:ecx,5:edx,6:edi,7:esi void *para; uthread_t parent; void*(*main_fun)(void*); void *stack; int32_t ssize; int8_t first_run;};#ifdef _DEBUG//for debug versionvoid uthread_main_function(){ int32_t arg; __asm__ volatile( "movl %%eax,%0\t\n" : :"m"(arg) ); uthread_t u = (uthread_t)arg; void *ret = u->main_fun(u->para); if(u->parent) uthread_switch(u,u->parent,ret); else exit(0); }#else//for release versionvoid __attribute__((regparm(1))) uthread_main_function(void *arg){ uthread_t u = (uthread_t)arg; void *ret = u->main_fun(u->para); if(u->parent) uthread_switch(u,u->parent,ret); else exit(0);}#endifuthread_t uthread_create(uthread_t parent,void*stack,uint32_t stack_size,void*(*fun)(void*)){ uthread_t u = (uthread_t)calloc(1,sizeof(*u)); u->parent = parent; u->main_fun = fun; u->stack = stack; u->ssize = stack_size; if(stack) { u->reg[0] = (int32_t)stack+stack_size-4; u->reg[1] = (int32_t)stack+stack_size-4; } if(u->main_fun) u->first_run = 1; return u;}void uthread_destroy(uthread_t *u){ free(*u); *u = NULL;}#ifdef _DEBUGvoid* __attribute__((regparm(3))) uthread_switch(uthread_t from,uthread_t to,void *para){ if(!from) return NULL; to->para = para; int32_t esp,ebp,eax,ebx,ecx,edx,edi,esi; //save current registers //the order is important __asm__ volatile( "movl %%eax,%2\t\n" "movl %%ebx,%3\t\n" "movl %%ecx,%4\t\n" "movl %%edx,%5\t\n" "movl %%edi,%6\t\n" "movl %%esi,%7\t\n" "movl %%ebp,%1\t\n" "movl %%esp,%0\t\n" : :"m"(esp),"m"(ebp),"m"(eax),"m"(ebx),"m"(ecx),"m"(edx),"m"(edi),"m"(esi) ); from->reg[0] = esp; from->reg[1] = ebp; from->reg[2] = eax; from->reg[3] = ebx; from->reg[4] = ecx; from->reg[5] = edx; from->reg[6] = edi; from->reg[7] = esi; if(to->first_run) { to->first_run = 0; esp = to->reg[0]; //use eax to pass arg eax = (int32_t)to; __asm__ volatile ( "movl %1,%%eax\t\n" "movl %0,%%ebp\t\n" "movl %%ebp,%%esp\t\n" : :"m"(esp),"m"(eax) ); uthread_main_function(); } else { esp = to->reg[0]; ebp = to->reg[1]; eax = to->reg[2]; ebx = to->reg[3]; ecx = to->reg[4]; edx = to->reg[5]; edi = to->reg[6]; esi = to->reg[7]; //the order is important __asm__ volatile ( "movl %2,%%eax\t\n" "movl %3,%%ebx\t\n" "movl %4,%%ecx\t\n" "movl %5,%%edx\t\n" "movl %6,%%edi\t\n" "movl %7,%%esi\t\n" "movl %1,%%ebp\t\n" "movl %0,%%esp\t\n" : :"m"(esp),"m"(ebp),"m"(eax),"m"(ebx),"m"(ecx),"m"(edx),"m"(edi),"m"(esi) ); } return from->para;}#elsevoid* __attribute__((regparm(3))) uthread_switch(uthread_t from,uthread_t to,void *para){ if(!from) return NULL; to->para = para; int32_t esp,ebp,edi,esi; //save current registers //the order is important __asm__ volatile( "movl %%eax,%2\t\n" "movl %%ebx,%3\t\n" "movl %%ecx,%4\t\n" "movl %%edx,%5\t\n" "movl %%edi,%6\t\n" "movl %%esi,%7\t\n" "movl %%ebp,%1\t\n" "movl %%esp,%0\t\n" : :"m"(from->reg[0]),"m"(from->reg[1]),"m"(from->reg[2]),"m"(from->reg[3]) ,"m"(from->reg[4]),"m"(from->reg[5]),"m"(from->reg[6]),"m"(from->reg[7]) ); if(to->first_run) { to->first_run = 0; //change stack //the order is important __asm__ volatile ( "movl %0,%%ebp\t\n" "movl %%ebp,%%esp\t\n" : :"m"(to->reg[0]) ); uthread_main_function((void*)to); } else { esp = to->reg[0]; ebp = to->reg[1]; edi = to->reg[6]; esi = to->reg[7]; //the order is important __asm__ volatile ( "movl %2,%%eax\t\n" "movl %3,%%ebx\t\n" "movl %4,%%ecx\t\n" "movl %5,%%edx\t\n" "movl %6,%%edi\t\n" "movl %7,%%esi\t\n" "movl %1,%%ebp\t\n" "movl %0,%%esp\t\n" : :"m"(esp),"m"(ebp),"m"(to->reg[2]),"m"(to->reg[3]) ,"m"(to->reg[4]),"m"(to->reg[5]),"m"(edi),"m"(esi) ); } return from->para;}#endif
test.c
#include#include "uthread.h"#include "SysTime.h"#include void* ufun2(void *arg){ printf("ufun2\n"); char **tmp = (char**)arg; uthread_t self = (uthread_t)tmp[0]; uthread_t parent = (uthread_t)tmp[1]; volatile void *ptr = self; while(ptr) { ptr = uthread_switch(self,parent,NULL); } return NULL;}char *stack1;char *stack2;void* ufun1(void *arg){ uthread_t self = (uthread_t)arg; uthread_t u = uthread_create(self,stack2,4096,ufun2); char* _arg[2]; _arg[0] = (char*)u; _arg[1] = (char*)self; int i = 0; uint32_t tick = GetSystemMs(); for( ; i < 20000000; ++i) { uthread_switch(self,u,&_arg[0]); } printf("%d\n",GetSystemMs()-tick); uthread_switch(self,u,NULL); return arg;}int main(){ stack1 = (char*)malloc(4096); stack2 = (char*)malloc(4096); /* * if use ucontext version char dummy_stack[4096]; uthread_t p = uthread_create(NULL,dummy_stack,0,NULL); */ uthread_t p = uthread_create(NULL,NULL,0,NULL); uthread_t u = uthread_create(p,stack1,4096,ufun1); uthread_switch(p,u,u); printf("main end\n"); return 0;};