简单说“分词”就是指将一个句子拆分为一个个词语。调用的是“海量分词”的DLL库。
海量分词官方网站 http://www.hylanda.com/ (没看到有下载的地方)
试用装下载 http://www.onlinedown.net/soft/39759.htm
例子程序实际来自 https://code.google.com/p/jamessrc/source/checkout
这里我将上面的例子修改为动态调用。其中的 HLSSplit.dat 是DLL自带的分词库,在使用时需要通过 HLSplitInit 来指定这个库的路径。
主要代码例子如下
unit Unit1;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
type
SHLSegWord=record
s_szWord:pchar ; //字符串
s_dwPOS:longint ; //词性标志
s_fWeight:single; //关键词权重,如果不是关键词,权重为0
end;
TForm1 = class(TForm)
Memo1: TMemo;
Button1: TButton;
Memo2: TMemo;
Label1: TLabel;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
THLSplitInit = function (lpszDataFilePath:pchar):boolean;cdecl;
THLFreeSplit = function (): string;cdecl;
THLOpenSplit = function ():THANDLE;cdecl;
THLCloseSplit = function (hHandle:THANDLE ):boolean ;cdecl;
THLSplitWord = function (hHandle:tHANDLE ; b:LPCSTR ; iExtraCalcFlag:integer = 0):boolean;stdcall;
THLGetWordCnt = function (hHandle:tHANDLE ): integer;cdecl;
THLGetWordAt = function (hHandle:tHANDLE; iIndex:integer):Pointer ;cdecl;
var
Form1: TForm1;
const HL_CAL_OPT_KEYWORD:Integer = $1; //计算关键词附加标识
const HL_CAL_OPT_FINGER: Integer = $2; //计算文章语义指纹标识
const HL_CAL_OPT_POS: Integer = $4; //计算词性标识
const HL_CAL_OPT_SEARCH: Integer = $8;//输出面向检索的分词结果
const NATURE_D_A:Integer = $40000000; // 形容词 形语素
const NATURE_D_B:Integer = $20000000; // 区别词 区别语素
const NATURE_D_C:Integer = $10000000; // 连词 连语素
const NATURE_D_D:Integer = $08000000; // 副词 副语素
const NATURE_D_E:Integer = $04000000; // 叹词 叹语素
const NATURE_D_F:Integer = $02000000; // 方位词 方位语素
const NATURE_D_I:Integer = $01000000; // 成语
const NATURE_D_L:Integer = $00800000; // 习语
const NATURE_A_M:Integer = $00400000; // 数词 数语素
const NATURE_D_MQ:Integer = $00200000; // 数量词
const NATURE_D_N:Integer = $00100000; // 名词 名语素
const NATURE_D_O:Integer = $00080000; // 拟声词
const NATURE_D_P:Integer = $00040000; // 介词
const NATURE_A_Q:Integer = $00020000; // 量词 量语素
const NATURE_D_R:Integer = $00010000; // 代词 代语素
const NATURE_D_S:Integer = $00008000; // 处所词
const NATURE_D_T:Integer = $00004000; // 时间词
const NATURE_D_U:Integer = $00002000; // 助词 助语素
const NATURE_D_V:Integer = $00001000; // 动词 动语素
const NATURE_D_W:Integer = $00000800; // 标点符号
const NATURE_D_X:Integer = $00000400; // 非语素字
const NATURE_D_Y:Integer = $00000200; // 语气词 语气语素
const NATURE_D_Z:Integer = $00000100; // 状态词
const NATURE_A_NR:Integer = $00000080; // 人名
const NATURE_A_NS:Integer = $00000040; // 地名
const NATURE_A_NT:Integer = $00000020; // 机构团体
const NATURE_A_NX:Integer = $00000010; // 外文字符
const NATURE_A_NZ:Integer = $00000008; // 其他专名
const NATURE_D_H:Integer = $00000004; // 前接成分
const NATURE_D_K:Integer = $00000002; // 后接成分
implementation
{$R *.dfm}
function AddNatureString(strWord:string; dwPos:longint):string;
begin
if((dwPos and NATURE_D_A) = NATURE_D_A) then
strWord := strWord +('/a')//形容词
else if((dwPos and NATURE_D_B) = NATURE_D_B) then
strWord := strWord +('/b')//区别词
else if((dwPos and NATURE_D_C) = NATURE_D_C) then
strWord := strWord +('/c')//连词
else if((dwPos and NATURE_D_D) = NATURE_D_D) then
strWord := strWord +('/d')//副词
else if((dwPos and NATURE_D_E) = NATURE_D_E) then
strWord := strWord +('/e')//叹词
else if((dwPos and NATURE_D_F) = NATURE_D_F) then
strWord := strWord +('/f')//方位词
else if((dwPos and NATURE_D_I) = NATURE_D_I) then
strWord := strWord +('/i')//成语
else if((dwPos and NATURE_D_L) = NATURE_D_L) then
strWord := strWord +('/l')//习语
else if((dwPos and NATURE_A_M) = NATURE_A_M) then
strWord := strWord +('/m')//数词
else if((dwPos and NATURE_D_MQ) = NATURE_D_MQ) then
strWord := strWord +('/mq')//数量词
else if((dwPos and NATURE_D_N) = NATURE_D_N) then
strWord := strWord + ('/n')//名词
else if((dwPos and NATURE_D_O) = NATURE_D_O) then
strWord := strWord +('/o')//拟声词
else if((dwPos and NATURE_D_P) = NATURE_D_P) then
strWord := strWord +('/p')//介词
else if((dwPos and NATURE_A_Q) = NATURE_A_Q) then
strWord := strWord +('/q')//量词
else if((dwPos and NATURE_D_R) = NATURE_D_R) then
strWord := strWord +('/r')//代词
else if((dwPos and NATURE_D_S) = NATURE_D_S) then
strWord := strWord +('/s')//处所词
else if((dwPos and NATURE_D_T) = NATURE_D_T) then
strWord := strWord +('/t')//时间词
else if((dwPos and NATURE_D_U) = NATURE_D_U) then
strWord := strWord +('/u')//助词
else if((dwPos and NATURE_D_V) = NATURE_D_V) then
strWord := strWord +('/v')//动词
else if((dwPos and NATURE_D_W) = NATURE_D_W) then
strWord := strWord +('/w')//标点符号
else if((dwPos and NATURE_D_X) = NATURE_D_X) then
strWord := strWord +('/x')//非语素字
else if((dwPos and NATURE_D_Y) = NATURE_D_Y) then
strWord := strWord +('/y')//语气词
else if((dwPos and NATURE_D_Z) = NATURE_D_Z) then
strWord := strWord +('/z')//状态词
else if((dwPos and NATURE_A_NR) = NATURE_A_NR) then
strWord := strWord +('/nr')//人名
else if((dwPos and NATURE_A_NS) = NATURE_A_NS) then
strWord := strWord +('/ns')//地名
else if((dwPos and NATURE_A_NT) = NATURE_A_NT) then
strWord := strWord +('/nt')//机构团体
else if((dwPos and NATURE_A_NX) = NATURE_A_NX) then
strWord := strWord +('/nx')//外文字符
else if((dwPos and NATURE_A_NZ) = NATURE_A_NZ) then
strWord := strWord +('/nz')//其他专名
else if((dwPos and NATURE_D_H) = NATURE_D_H) then
strWord := strWord +('/h')//前接成分
else if((dwPos and NATURE_D_K) = NATURE_D_K) then
strWord := strWord +('/k')//后接成分
else
strWord := strWord +('/?');//未知词性
Result:=strWord;
end;
procedure TForm1.Button1Click(Sender: TObject); //stdcall;
var
bSuccess:bool;
b:LPCSTR;
hHandle,DllHandle:tHANDLE;
i,iExtraCalcFlag,nResultCnt:integer;
strResult,strWord:string ;
pWord:^SHLSegWord ;
HLSplitInit:THLSplitInit;
HLFreeSplit:THLFreeSplit;
HLOpenSplit:THLOpenSplit;
HLCloseSplit:THLCloseSplit;
HLSplitWord:THLSplitWord;
HLGetWordCnt:THLGetWordCnt;
HLGetWordAt:THLGetWordAt;
begin
DllHandle:=LoadLibrary(PAnsiChar('HLSSplit.dll'));
if DllHandle=0 then
begin
ShowMessage('Loading DLL Error!');
exit;
end;
@HLSplitInit:=GetProcAddress(DllHandle,'HLSplitInit');
@HLFreeSplit:=GetProcAddress(DllHandle,'HLFreeSplit');
@HLOpenSplit:=GetProcAddress(DllHandle,'HLOpenSplit');
@HLCloseSplit:=GetProcAddress(DllHandle,'HLCloseSplit');
@HLSplitWord:=GetProcAddress(DllHandle,'HLSplitWord');
@HLGetWordCnt:=GetProcAddress(DllHandle,'HLGetWordCnt');
@HLGetWordAt:=GetProcAddress(DllHandle,'HLGetWordAt');
if (Not Assigned(@HLSplitInit)) or
(Not Assigned(@HLOpenSplit)) or
(Not Assigned(@HLCloseSplit)) or
(Not Assigned(@HLSplitWord)) or
(Not Assigned(@HLGetWordCnt)) or
(Not Assigned(@HLGetWordAt))
then
begin
showmessage('Loading function Error!');
exit;
end;
//这里说的是词库文件的路径,空表示当前路径
if(not HLSplitInit('')) then
begin
ShowMessage('海量分词初始化失败!');
Exit;
end;
//创建分词句柄
hHandle:= HLOpenSplit ();
if(hHandle = INVALID_HANDLE_VALUE) then
begin
//创建分词句柄失败
ShowMessage('创建分词句柄失败!') ;
HLFreeSplit () ;//卸载分词字典
exit;
end ;
//分词并对分词结果进行处理
iExtraCalcFlag:=HL_CAL_OPT_POS; //附加计算标志,词性计算
b:=pchar(Memo1.Text);
bSuccess:= HLSplitWord (hHandle, b, iExtraCalcFlag);
strResult:= ' ' ;
if(bSuccess) then
//分词成功
begin
nResultCnt := HLGetWordCnt (hHandle);//取得分词个数
Label1.Caption:=inttostr(nResultCnt);
for i:=0 to nResultCnt-1 do
//取得分词结果
begin
pWord:= HLGetWordAt(hHandle , i);//取得一个分词结果
if pWord.s_szWord<>'' then
begin
strWord:=(pWord.s_szWord); //PChar(pWord.s_szWord);
if(iExtraCalcFlag=HL_CAL_OPT_POS) then//词性
strWord :=AddNatureString(strWord,pWord.s_dwPOS); //显示词性标准
strResult := strResult+strWord;
strResult :=strResult+ (' ') ; //以空格分割分词结果中的每个词
end
end
end
else
//分词失败
begin
ShowMessage('分词失败!') ;
end;
//卸载分词词典
//关闭分词句柄
HLCloseSplit(hHandle);
//卸载海量分词
HLFreeSplit();
Memo2.Text:=strResult;
FreeLibrary(DllHandle);
end;
end.

你好,运行LoadLibrary(PAnsiChar(‘HLSSplit.dll’))时总是返回0,请问是哪里错了吗?
这个太久没有搞了,我也不清楚了。
你用 getlasterror 看看?