当前位置:   article > 正文

几个实用的HTML解析函数分享。_html直接用的函数

html直接用的函数

1)HTML 标签值攫取函数,任意标签哦,纯字符串分析,可以配合IDHTTP编程

uses StrUtils;

function ExtractHtmlTagValues(const HtmlText: string; TagName,AttribName: string; var Values: TStringList): integer;

function FindFirstCharAfterSpace(const Line: string; StartPos:integer): Integer;

var i: integer;

begin

Result := -1;

for i := StartPos to Length(Line) do

begin

if (Line[i] <> ' ') then

begin

Result := i;

exit;

end;

end;

end;

function FindFirstSpaceAfterChars(const Line: string; StartPos:integer): Integer;

begin

Result := PosEx(' ', Line, StartPos);

end;

function FindFirstSpaceBeforeChars(const Line: string; StartPos:integer): Integer;

var i: integer;

begin

Result := 1;

for i := StartPos downto 1 do

begin

if (Line[i] = ' ') then

begin

Result := i;

exit;

end;

end;

end;

var InnerTag: string;

LastPos, LastInnerPos: Integer;

SPos, LPos, RPos: Integer;

AttribValue: string;

ClosingChar: char;

TempAttribName: string;

begin

Result := 0;

LastPos := 1;

while (true) do

begin

// find outer tags '<' &'>'

LPos := PosEx('<', HtmlText, LastPos);

if (LPos <= 0) then break;

RPos := PosEx('>', HtmlText, LPos+1);

if (RPos <= 0) then

LastPos := LPos + 1

else

LastPos := RPos + 1;

// get inner tag

InnerTag := Copy(HtmlText, LPos+1, RPos-LPos-1);

InnerTag := Trim(InnerTag); // remove spaces

if (Length(InnerTag) < Length(TagName)) thencontinue;

// check tag name

if (SameText(Copy(InnerTag, 1, Length(TagName)), TagName))then

begin

// found tag

AttribValue := '';

LastInnerPos := Length(TagName)+1;

while (LastInnerPos < Length(InnerTag)) do

begin

// find first '=' after LastInnerPos

RPos := PosEx('=', InnerTag, LastInnerPos);

if (RPos <= 0) then break;

// this way you can check for multiple attrib names and not aspecific attrib

SPos := FindFirstSpaceBeforeChars(InnerTag, RPos);

TempAttribName := Trim(Copy(InnerTag, SPos, RPos-SPos));

if (true) then

begin

// found correct tag

LPos := FindFirstCharAfterSpace(InnerTag, RPos+1);

if (LPos <= 0) then

begin

LastInnerPos := RPos + 1;

continue;

end;

LPos := FindFirstCharAfterSpace(InnerTag, LPos); // get to firstchar after '='

if (LPos <= 0) then continue;

if ((InnerTag[LPos] <> '"') and(InnerTag[LPos] <> '''')) then

begin

// AttribValue is not between '"' or ''' so get it

RPos := FindFirstSpaceAfterChars(InnerTag, LPos+1);

if (RPos <= 0) then

AttribValue := Copy(InnerTag, LPos, Length(InnerTag)-LPos+1)

else

AttribValue := Copy(InnerTag, LPos, RPos-LPos+1);

end

else

begin

// get url between '"' or '''

ClosingChar := InnerTag[LPos];

RPos := PosEx(ClosingChar, InnerTag, LPos+1);

if (RPos <= 0) then

AttribValue := Copy(InnerTag, LPos+1,Length(InnerTag)-LPos-1)

else

AttribValue := Copy(InnerTag, LPos+1, RPos-LPos-1)

end;

if (SameText(TempAttribName, AttribName)) and (AttribValue<> '') then

begin

Values.Add(AttribValue);

inc(Result);

end;

end;

if (RPos <= 0) then

LastInnerPos := Length(InnerTag)

else

LastInnerPos := RPos+1;

end;

end;

end;

end;

用法示例:

取得页面中所有链接

var

Links : TStringList;

LinkFound,i : Integer;

begin

Links := TStringList.Create;

LinkFound := ExtractHtmlTagValues(HtmlText,'A','HREF',Links);

for i:=0 to LinkFound-1 do

begin

//Add your own codes here

end;

Links.Free;

end;

2)表单元素值攫取函数,可以从HTML文本中按照给定的Input名称解析出其Value

function GetValByName(S, Sub: string) : string;

var

EleS,EleE,iPos: Integer;

ELeStr,ValSt: String;

St,Ct : Integer;

function FindEleRange(str: string ; front : boolean; posi :integer): Integer;

var

i: integer;

begin

if Front then

begin

for i:=posi-1 downto 1 do

if Str[i]='<' then

begin

Result := i;

break;

end;

end else begin

for i := posi+1 to length(Str) do

if Str[i]='>' then

begin

Result := i;

break;

end;

end;

end;

function FindEnd (str : string; posi : integer) : Integer;

var

i: integer;

begin

for i:=posi to length(str) do

begin

if (str[i] ='"') or (str[i] ='''') or (str[i] =' ') then

begin

result := i-1;

break;

end;

end;

end;

begin

iPos := Pos('name="'+lowercase(Sub)+'"',lowercase(S));

if iPos = 0 then iPos :=Pos('name='+lowercase(Sub),lowercase(S));

if iPos = 0 then iPos :=Pos('name='''+lowercase(Sub)+'''',lowercase(S));

if iPos = 0 then exit;

EleS := FindEleRange(S,TRUE,iPos);

EleE := FindEleRange(S,FALSE,iPos);

EleStr := Copy(S,EleS,EleE-EleS+1);

ValSt := 'value="';

iPos := Pos(ValSt,EleStr);

if iPos = 0 then

begin

ValSt := 'value=''';

iPos := Pos(ValSt,EleStr);

end;

if iPos = 0 then

begin

ValSt := 'value=';

iPos := Pos(ValSt,EleStr);

end;

St := iPos+length(ValSt);

Ct := FindEnd(EleStr,St)-St+1;

Result := Copy(EleStr,St,Ct);

end;

用法示例:

取得页面中名为 Submit 的表单项的值

var

InputValue : String;

begin

InputValue := GetValByName(HtmlText,'Submit');

end;

3)取某两个字符串中间的字符

function getStrFromHtml(var Source: String; SbStr, bStr, eStr:String): String;

var

I: Integer;

sbPos, bPos, ePos: Integer;

S: String;

begin

S := Source;

Result := '' ;

if SBStr <> '' then

Begin

sbPos := Pos(UpperCase(SbStr), UpperCase(S));

if sbPos > 0 then

Delete(S, 1, sbPos - 1 + length(sbStr))

Else

Exit;

End;

bPos := Pos(UpperCase(bStr), UpperCase(S));

if bPos > 0 then

Delete(S, 1, bPos - 1 + length(bStr))

Else

Exit;

ePos := pos(UpperCase(eStr), UpperCase(S));

if ePos > 0 then

Delete(S, ePos, length(S));

Result := S;

end;

用法实例:

FUserID := getStrFromHtml(reqStr, 'id="userID"', 'value="','"');

转自:品略网  https://www.pinlue.cn/

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/92987
推荐阅读
相关标签
  

闽ICP备14008679号