在 http://stackoverflow.com/questions/22342854/what-is-the-optimal-algorithm-for-the-game-2048/22389702#22389702 上有很多大牛提出了自己的解决方法。
ovolve 使用了 a-b剪枝,这是典型的用在博弈上的算法,比如:之前的中国象棋就是用了这个方法。
Nicola Pezzotti 的特点在于贡献了一个很简易的计算局面整洁度的算法.....
问题是上面的都是 js 语言编写的,对于我来说很难理解,并且很难进行评估。
个人感觉算法本身不难,困难的是如何理解他们使用的数据结构,更具体来说困难度在于如何进行局面评估的算法(这个问题中,需要对盘面顺序程度和整体分数进行评估)。
最终,目光落在 nneonneo 的算法上。他使用了 bit-map 的技术,能够充分利用空间:每一个格子从 0-15 ,对应着 2^0 - 2^15 ,整个局面用64位即可表示。这样做大大
降低了传递参数之类的耗时,客观上节省了时间。此外,还使用数组直接存放了行列的分数,评估行列分数直接让数组值相加即可,这是非常经典的空间换时间的做法。
他的程序可以在他的 git上下载到 https://github.com/nneonneo/2048-ai ,我这里也存了一份可以在 vs2008下编译通过的
我的程序完全仿照了他的程序,只是没有实现他用来剪枝的泛型 typedef std::map
在我的 I5 2450 上,一步大约需要2s左右。
program NeoNeo; {$APPTYPE CONSOLE} uses SysUtils,math,windows; const ROW_MASK=$FFFF; COL_MASK=$000F000F000F000F; CPROB_THRESH_BASE = 0.0001; CACHE_DEPTH_LIMIT=1; SEARCH_DEPTH_LIMIT=3; b:array[0..13] of integer=(0,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192); type board_t=int64; row_t=word; get_move_func_t = function(board:board_t):integer; T_eval_state=record cprob_thresh:single; maxdepth:integer; curdepth:integer; cachehits:integer; moves_evaled:integer; end; function score_tilechoose_node(var state:T_eval_state; board:board_t; cprob:single):single; forward; function reverse_row(row:row_t):row_t; forward; function pack_col(col:board_t):row_t; forward; function unpack_col(row:row_t):board_t; forward; function score_move_node(Var state:t_eval_state;board:board_t; cprob:single):single; forward; var row_left_table:array[0..65535] of board_t; row_right_table:array[0..65535] of board_t; col_up_table:array[0..65535] of board_t; col_down_table:array[0..65535] of board_t; line_heur_score_table:array[0..65535] of single; row_score_table:array[0..65535] of single; procedure init_move_tables; var row:LongWord; line:array[0..3] of LongWord; result:row_t; i,j:integer; begin fillchar(row_left_table,0,sizeof(row_left_table)); fillchar(row_right_table,0,sizeof(row_right_table)); fillchar(col_up_table,0,sizeof(col_up_table)); fillchar(col_down_table,0,sizeof(col_down_table)); for row:= 0 to 65535 do begin line[0]:=row and $f; line[1]:=(row shr 4) and $f; line[2]:=(row shr 8) and $f; line[3]:=(row shr 12) and $f; i:=0; while (i<3) do begin j:=i+1; //for j := i+1 to 3 do while (j<4) do begin if line[j]<>0 then break; inc(j); end; if (j=4) then break; if (line[i]=0) then begin line[i]:=line[j]; line[j]:=0; dec(i); end else if (line[i]=line[j]) and (line[i]<>$f) then begin inc(line[i]); line[j]:=0; end; inc(i); end; result:=(line[0]) or (line[1] shl 4) or (line[2] shl 8) or (line[3] shl 12); row_left_table[row]:=row xor result; row_right_table[reverse_row(row)]:=reverse_row(row) xor reverse_row(result); col_up_table[row] := unpack_col(row) xor unpack_col(result); col_down_table[reverse_row(row)]:=unpack_col(reverse_row(row)) xor unpack_col(reverse_row(result)); end; end; function execute_move_0(var board:board_t):board_t; var tmp:board_t; ret:board_t; begin ret:=board; tmp:=col_up_table[pack_col((board shr (4*0))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*0)); tmp:=col_up_table[pack_col((board shr (4*1))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*1)); tmp:=col_up_table[pack_col((board shr (4*2))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*2)); tmp:=col_up_table[pack_col((board shr (4*3))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*3)); Result:=ret; end; function execute_move_1(var board:board_t):board_t; var tmp:board_t; ret:board_t; begin ret:=board; tmp:=col_down_table[pack_col((board shr (4*0))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*0)); tmp:=col_down_table[pack_col((board shr (4*1))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*1)); tmp:=col_down_table[pack_col((board shr (4*2))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*2)); tmp:=col_down_table[pack_col((board shr (4*3))and $000F000F000F000F)]; ret:=ret xor (tmp shl (4*3)); Result:=ret; end; function execute_move_2(var board:board_t):board_t; var tmp:board_t; ret:board_t; begin ret:=board; tmp:=row_left_table[board shr (16*0) and $FFFF]; ret:=ret xor (tmp shl (16*0)); tmp:=row_left_table[board shr (16*1) and $FFFF]; ret:=ret xor (tmp shl (16*1)); tmp:=row_left_table[board shr (16*2) and $FFFF]; ret:=ret xor (tmp shl (16*2)); tmp:=row_left_table[board shr (16*3) and $FFFF]; ret:=ret xor (tmp shl (16*3)); Result:=ret; end; function execute_move_3(var board:board_t):board_t; var tmp:board_t; ret:board_t; begin ret:=board; tmp:=row_right_table[board shr (16*0) and $FFFF]; ret:=ret xor (tmp shl (16*0)); tmp:=row_right_table[board shr (16*1) and $FFFF]; ret:=ret xor (tmp shl (16*1)); tmp:=row_right_table[board shr (16*2) and $FFFF]; ret:=ret xor (tmp shl (16*2)); tmp:=row_right_table[board shr (16*3) and $FFFF]; ret:=ret xor (tmp shl (16*3)); Result:=ret; end; function pack_col(col:board_t):row_t; begin Result:=col or (col shr 12) or (col shr 24) or (col shr 36); end; function unpack_col(row:row_t):board_t; var tmp:board_t; begin tmp:=row; Result:=(tmp or (tmp shl 12) or (tmp shl 24) or (tmp shl 36)) and COL_MASK; end; procedure print_board(board:board_t); var i:integer; begin for i := 0 to 3 do begin writeln(format('%4d %4d %4d %4d', [b[board and $f], b[board shr 4 and $f], b[board shr 8 and $f], b[board shr 12 and $f]])); board:=board shr 16; end; end; function reverse_row(row:row_t):row_t; begin Result:=((row shr 12) or ((row shr 4) and $F0) or ((row shl 4) and $0F00) or (row shl 12)); end; function execute_move(move:integer;board:board_t):Board_t; begin case move of 0://up begin Result:=execute_move_0(board); end; 1://down begin Result:=execute_move_1(Board); end; 2://left begin Result:=execute_move_2(board); end; 3://right begin Result:=execute_move_3(Board); end; else Result:=0; end; end; function get_max_rank(board:board_t):integer; var maxrank,k:integer; begin maxrank:=0; while board<>0 do begin k:=board and $f; if k>maxrank then maxrank:=k; board:=board shr 4; end; Result:=maxrank; end; procedure init_score_tables; var row:Longword; i,maxi:integer; heur_score:single; score:single; line:array[0..3] of LongWord; rank,maxrank:LongWord; begin fillchar(line_heur_score_table,0,sizeof(line_heur_score_table)); fillchar(row_score_table,0,sizeof(row_score_table)); for row:= 0 to 65535 do begin line[0]:=row and $f; line[1]:=(row shr 4) and $f; line[2]:=(row shr 8) and $f; line[3]:=(row shr 12) and $f; heur_score:=0; score:=0; for i := 0 to 3 do begin rank:=line[i]; if (rank=0) then begin heur_score:=heur_score+10000; end else if (rank>=2) then begin score:=score+(rank-1)*power(2,rank); end; end; maxi:=0; maxrank:=0; for i := 0 to 3 do begin rank:=line[i]; if (rank>maxrank) then begin maxrank:=rank; maxi:=i; end; end; if (maxi=0) or (maxi=3) then heur_score:=heur_score+20000; for i := 1 to 3 do if (line[i]=line[i-1]+1) or (line[i]=line[i-1]-1) then heur_score:=heur_score+1000; if (line[0]<line[1]) and (line[1]<line[2]) and (line[2]<line[3]) then heur_score:=heur_score+10000; if (line[0]>line[1]) and (line[1]>line[2]) and (line[2]>line[3]) then heur_score:=heur_score+10000; row_score_table[row]:=score; line_heur_score_table[row]:=heur_score; end; end; { function score_board(board:board_t;tbl):single; begin Result:=(tbl[board and row_mask])+ (tbl[board shr 16 and row_mask])+ (tbl[board shr 32 and row_mask])+ (tbl[board shr 48 and row_mask]); end; function score_col_board(board:board_t;tbl):single; begin Result:=(tbl[pack_col[board and col_mask]])+ (tbl[pack_col[board shr 4 and col_mask])+ (tbl[pack_col[board shr 8 and col_mask])+ (tbl[pack_col[board shr 12 and col_mask]); end; } function score_heur_board(board:board_t):single; begin Result:=(line_heur_score_table[board and row_mask])+ (line_heur_score_table[board shr 16 and row_mask])+ (line_heur_score_table[board shr 32 and row_mask])+ (line_heur_score_table[board shr 48 and row_mask])+ (line_heur_score_table[pack_col(board and col_mask)])+ (line_heur_score_table[pack_col(board shr 4 and col_mask)])+ (line_heur_score_table[pack_col(board shr 8 and col_mask)])+ (line_heur_score_table[pack_col(board shr 12 and col_mask)])+100000; end; function score_board(board:board_t):single; begin result:=(row_score_table[board and row_mask])+ (row_score_table[(board shr 16) and row_mask] )+ (row_score_table[(board shr 32) and row_mask])+ (row_score_table[(board shr 48) and row_mask]); end; function score_tilechoose_node(var state:T_eval_state; board:board_t; cprob:single):single; var res:single; num_open,i:integer; begin res:=0; num_open:=0; for i:=0 to 15 do begin //tmp:=board shr (4*i); if (board shr (4*i) and $f)=0 then inc(num_open); end; cprob:=cprob /num_open; for i := 0 to 15 do begin if ((board shr (4*i)) and $f =0) then begin res:=res+score_move_node(state,board or (Board_t (1) shl (4*i)),cprob *0.9) *0.9; res:=res+score_move_node(state,board or (Board_t (2) shl (4*i)),cprob *0.1) *0.1; end; end; Result:=Res / num_open; end; function score_move_node(Var state:t_eval_state;board:board_t; cprob:single):single; var move:integer; best:single; res:single; newboard:board_t; //i:IsingleMapIterator; begin if (cprob<state.cprob_thresh) or (state.curdepth >= SEARCH_DEPTH_LIMIT) then begin if (state.curdepth > state.curdepth) then begin state.maxdepth:=state.curdepth; end; Result:=score_heur_board(board); exit; end; { if (state.curdepth < CACHE_DEPTH_LIMIT) then begin // i:=state.trans_table.Find(board); if state.trans_table.Count(board)>0 then begin inc(state.cachehits,1); Result:=state.trans_table.Find(board).Value; exit; end; end; } best:=0; inc(state.curdepth); for move := 0 to 3 do begin newboard:=execute_move(move,board); inc(state.moves_evaled); if (board = newboard) then continue; res:=score_tilechoose_node(state,newboard,cprob); if res > best then best:=res; end; dec(state.curdepth); {if state.curdepth < CACHE_DEPTH_LIMIT then begin end; //state.trans_table[board]:=best;} result:=best; end; function t_score_toplevel_move(var state:t_eval_state;board:board_t;move:integer):single; var newboard:board_t; begin newboard:=execute_move(move,board); if board=newboard then begin Result:=0; exit; end; state.cprob_thresh:=CPROB_THRESH_BASE; Result:=score_tilechoose_node(state,newboard,1)+1e-6; end; function score_toplevel_move(board:board_t; move:integer):single; var res:single; start,finish:DWORD; state:t_eval_state; begin state.cprob_thresh:=0; state.maxdepth:=0; state.curdepth:=0; state.cachehits:=0; state.moves_evaled:=0; start:=GetTickCount; res:=t_score_toplevel_move(state,board,move); finish:=GetTickCount; writeln(format('Move %d: result %f: eval %d moves ', [move,res,state.moves_evaled])+'in '+IntToStr(Finish-Start)+' ms'); result:=res; end; function find_best_move(board:board_t):integer; var move:integer; best:single; bestmove:integer; res:single; begin best:=0; bestmove:=-1; print_board(board); writeln(format('Current scores: heur %.0f, actual %.0f',[score_heur_board(board),score_board(board)])); for move := 0 to 3 do begin res:=score_toplevel_move(board,move); if (res>best) then begin best:=res; bestmove:=move; end; end; Result:=bestmove; end; function draw_tile:integer; begin if Random(9)=0 then Result:=2 else Result:=1; end; function insert_tile_rand(board:board_t;tile:integer):board_t; var num_open:integer; i,index:integer; tmp:board_t; begin num_open:=0; for i := 0 to 15 do if (board shr (4*i) and $f)=0 then begin inc(num_open) end; if num_open =0 then begin writeln('insert_tile_rand:no open spots!'); Result:= board; exit; end; index:=random(num_open); for i := 0 to 15 do begin if ((board shr (4*i))and $f)<>0 then continue; if (index=0) then begin tmp:=tile; board:=board or (tmp shl (4 *i)); break; end; dec(index); end; Result:=board; end; function initial_board:board_t; var board:board_t; i:integer; begin randomize; board:=0; for i := 0 to 1 do board:=insert_tile_rand(board,draw_tile); Result:=board; end; procedure play_game(get_move:get_move_func_t); var board:board_t; moveno:integer; scorepenalty:integer; move:integer; newboard:board_t; tile:integer; begin board:=initial_board; moveno:=0; scorepenalty:=0; while True do begin move:=0; while move<4 do begin if execute_move(move,board)<>board then break; inc(move); end; if move=4 then break; inc(moveno); writeln(format('Move %d , current score=%.0f, %d',[moveno,score_board(board)-scorepenalty,scorepenalty])); move:=get_move(board); if (move < 0) then break; newboard:=execute_move(move,board); if (newboard=board) then begin writeln('Illegal move!'); dec(moveno); continue; end; tile:=draw_tile; if tile=2 then inc(scorepenalty,4); board:=insert_tile_rand(newboard,tile); writeln; end; print_board(board); writeln(Format('Game Over. Your score is %.0f. The hightest rank you achieved was %d.',[score_board(board)-scorepenalty,get_max_rank(board)])); end; //Main begin init_move_tables; init_score_tables; play_game (find_best_move); readln; end.
目前上面这个程序可以达到 1024 (超长发挥可以达到2048)
后面会进行的改进:
1.使用整数代替程序中的single进行局面分数的评估。
2.实现nneonneo的剪枝算法,计划使用HouSisong大牛的DGL泛型库 http://blog.csdn.net/housisong/article/category/152693
2014/4/26 修正了盘面分数的问题:
修改之前
res:=res+score_move_node(state,board or 1 shl (4*i),cprob *0.9) *0.9;
修改之后
res:=res+score_move_node(state,board or (Board_t (1) shl (4*i)),cprob *0.9) *0.9;