unit SkyStringTokenizer;
{字符串分析处理类
}
interface
uses
SysUtils, Classes;
type
IIterator = interface
function hasNext(): boolean;
function next(): string;
end;
type
TSkyStringTokenizer = class(TComponent, IIterator)
private
tokens: TStringList;
index: integer;
data: string;
delimiter: string;
procedure init(dataLine: string; delim: string);
function CharacterInTokens(ch: string; const sl: TStringList): boolean;
function StringToCharArray(delim: string): TStringList;
function SplitString(source, ch: string): Tstringlist;
protected
{ Protected declarations }
public
{ Public declarations }
constructor create(dataLine: string); overload;
constructor Create(dataLine: string; delim: string); overload;
destructor destroy();override;
function hasNext(): boolean;
function next(): string;
published
{ Published declarations }
end;
procedure Register;
implementation
procedure Register;
begin
RegisterComponents('SkyDevKit', [TSkyStringTokenizer]);
end;
{ TStringTokenizer }
function TSkyStringTokenizer.hasNext: boolean;
begin
result := (index < tokens.Count - 1);
end;
procedure TSkyStringTokenizer.init(dataLine, delim: string);
begin
tokens := TStringList.Create();
delimiter := delim;
data := dataLine;
tokens := SplitString(data, delimiter);
//tokens := data.Split(delimiter.ToCharArray());
index := -1;
end;
function TSkyStringTokenizer.next: string;
var
s: string;
begin
inc(index);
s := tokens[index];
while ((length(s) <= 0) and (index < tokens.Count)) do
begin
inc(index);
s := tokens[index];
end;
result := s;
end;
function TSkyStringTokenizer.StringToCharArray(delim: string): TStringList;
var
sl: TStringList;
I: Integer;
begin
sl := TStringList.Create();
for I := 1 to length(delim) do // Iterate
begin
sl.Append(copy(delim, i, 1));
end; // for
result := sl;
end;
constructor TSkyStringTokenizer.Create(dataLine, delim: string);
begin
inherited Create(nil);
self.init(dataLine, delim);
end;
constructor TSkyStringTokenizer.Create(dataLine: string);
begin
inherited Create(nil);
self.init(dataLine, ' ');
end;
function TSkyStringTokenizer.SplitString(source, ch: string): tstringlist;
var
temp: string;
//i: integer;
sl: TStringList;
index: integer;
begin
if ch = '' then
ch := ' ';
sl := TStringList.Create();
result := tstringlist.Create();
sl := self.StringToCharArray(ch);
temp := '';
for Index := 1 to length(source) do // Iterate
begin
if not CharacterInTokens(source[index], sl) then
begin
temp := temp + source[Index];
end
else
begin
result.Add(temp);
temp := '';
end; //if
end; // for
if (temp <> '') then
result.Add(temp);
sl.Free;
end;
function TSkyStringTokenizer.CharacterInTokens(ch: string;
const sl: TStringList): boolean;
var
index: integer;
// I: Integer;
begin
result := false;
for Index := 0 to sl.Count - 1 do // Iterate
begin
result := result or (ch = sl[Index]);
{if ch = sl[index] then
begin
result := true;
break;
end;
}
end; // for
end;
destructor TSkyStringTokenizer.destroy;
begin
tokens.Free ;
inherited;
end;
end.
//下面是C#版本,最初版本不是我写的
using System;
namespace CsharpPats
{
//String Tokenizer class
public class StringTokenizer {
private string data, delimiter;
private string[] tokens;
private int index;
public StringTokenizer(string dataLine) {
init(dataLine, " ");
}
private void init(String dataLine, string delim) {
delimiter = delim;
data = dataLine;
tokens = data.Split (delimiter.ToCharArray() );
index = 0;
}
public StringTokenizer(string dataLine, string delim) {
init(dataLine, delim);
}
public bool hasMoreElements() {
return (index < (tokens.Length));
}
public string nextElement() {
string s = tokens[index++];
while((s.Length <=0) && (index<tokens.Length ))
s = tokens[index++];
return s;
}
}
}