Marco Web Center

[an error occurred while processing this directive]

Home: Code Repository: Mastering Delphi 6

Chapter 22 - Project WebSearcher

Project Structure

WebSearcher.dpr
program WebSearcher;

{$APPTYPE CONSOLE}

uses
  WebBroker,
  CGIApp,
  WebSearch in 'WebSearch.pas' {WebModule1: TWebModule};

{$R *.RES}

begin
  Application.Initialize;
  Application.CreateForm(TWebModule1, WebModule1);
  Application.Run;
end.
WebSearch.pas
unit WebSearch;

interface

uses
  Windows, Messages, SysUtils, Classes, HTTPApp, HTTPProd, Db, DBClient, DBWeb,
  IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient, IdHTTP;

type
  TWebModule1 = class(TWebModule)
    DataSetTableProducer1: TDataSetTableProducer;
    cds: TClientDataSet;
    IdHTTP1: TIdHTTP;
    procedure WebModule1WebActionItem1Action(Sender: TObject;
      Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
    procedure WebModule1WebActionItem2Action(Sender: TObject;
      Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
    procedure DataSetTableProducer1FormatCell(Sender: TObject; CellRow,
      CellColumn: Integer; var BgColor: THTMLBgColor;
      var Align: THTMLAlign; var VAlign: THTMLVAlign; var CustomAttrs,
      CellData: String);
  private
    { Private declarations }
  public
    strRead: string;
    procedure HtmlStringToCds;
    procedure GrabHtml (strurl: string);

  end;

var
  WebModule1: TWebModule1;

implementation

{$R *.DFM}

const
  strSearch = 'http://www.google.com/search?as_q=borland+delphi&num=100';

procedure TWebModule1.GrabHtml (strurl: string);
var
  Http1: TIdHTTP;
begin
  Http1 := TIdHTTP.Create (nil);
  try
    strRead := Http1.Get (StrUrl);
  finally
    Http1.Free;
  end;
end;

procedure TWebModule1.HtmlStringToCds;
var
  strAddr, strText: string;
  nText: integer;
  nBegin, nEnd: Integer;
begin
  strRead := LowerCase (strRead);
  repeat
    // find the initial part HTTP reference
    nBegin := Pos ('href=http', strRead);
    if nBegin <> 0 then
    begin
      // get the remaining part of the string, starting with 'http'
      strRead := Copy (strRead, nBegin + 5, 1000000);
      // find the end of the HTTP reference
      nEnd := Pos ('>', strRead);
      strAddr := Copy (strRead, 1, nEnd - 1);
      // move on
      strRead := Copy (strRead, nEnd + 1, 1000000);
      // add the URL if 'google' is not in it
      if Pos ('google', strAddr) = 0 then
      begin
        nText := Pos ('</a>', strRead);
        strText := copy (strRead, 1, nText - 1);

        // remove cached references and duplicates
        if (Pos ('cached', strText) = 0) and not cds.Locate ('Address', strAddr, []) then
          cds.InsertRecord ([0, strAddr, strText]);
      end;
    end;
  until nBegin = 0;
end;

procedure TWebModule1.WebModule1WebActionItem1Action(Sender: TObject;
  Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
var
  I: integer;
begin
  if not cds.Active then
    cds.CreateDataSet
  else
    cds.EmptyDataSet;

  for i := 0 to 5 do // how many pages?
  begin
    // get the data form the search site
    GrabHtml (strSearch + '&start=' + IntToStr (i*100));
    // scan it to fill the cds
    HtmlStringToCds;
  end;

  cds.First;
  // return producer content
  Response.Content := DataSetTableProducer1.Content;
end;

procedure TWebModule1.WebModule1WebActionItem2Action(Sender: TObject;
  Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
begin
  GrabHtml (strSearch);
  Response.Content := strRead;
end;

function SplitLong(str: string): string;
begin
  // add spaces after / but not at the beginning, and after &
  str := Copy (str, 1, 20) + StringReplace (
      Copy (str, 21, 1000), '/', '/ ', [rfReplaceAll]);
  Result := StringReplace (str, '&', '& ', [rfReplaceAll])
end;

procedure TWebModule1.DataSetTableProducer1FormatCell(Sender: TObject;
  CellRow, CellColumn: Integer; var BgColor: THTMLBgColor;
  var Align: THTMLAlign; var VAlign: THTMLVAlign; var CustomAttrs,
  CellData: String);
begin
  if CellRow <> 0 then
  case CellColumn of
    0: CellData := IntToStr (CellRow);
    1: CellData := '<a href="' +  CellData + '">' + SplitLong(CellData) + '</a>';
    2: CellData := SplitLong (CellData);
  end;
end;

end.
WebSearch.dfm
object WebModule1: TWebModule1
  OldCreateOrder = False
  Actions = <
    item
      Default = True
      Name = 'WebActionItem1'
      PathInfo = '/sites'
      OnAction = WebModule1WebActionItem1Action
    end
    item
      Name = 'WebActionItem2'
      PathInfo = '/source'
      OnAction = WebModule1WebActionItem2Action
    end>
  Left = 234
  Top = 107
  Height = 373
  Width = 443
  object DataSetTableProducer1: TDataSetTableProducer
    Columns = <
      item
        BgColor = 'Gray'
        Custom = 'width="20"'
        FieldName = 'Index'
      end
      item
        FieldName = 'Address'
      end
      item
        FieldName = 'Description'
      end>
    MaxRows = -1
    DataSet = cds
    TableAttributes.Border = 1
    TableAttributes.CellSpacing = 0
    TableAttributes.CellPadding = 2
    OnFormatCell = DataSetTableProducer1FormatCell
    Left = 88
    Top = 56
  end
  object cds: TClientDataSet
    Aggregates = <>
    FieldDefs = <
      item
        Name = 'Index'
        DataType = ftInteger
      end
      item
        Name = 'Address'
        DataType = ftString
        Size = 200
      end
      item
        Name = 'Description'
        DataType = ftString
        Size = 200
      end>
    IndexDefs = <
      item
        Name = 'cdsIndexAddress'
        Fields = 'Address'
      end>
    IndexFieldNames = 'Address'
    Params = <>
    StoreDefs = True
    Left = 176
    Top = 56
  end
  object IdHTTP1: TIdHTTP
    Request.Accept = 'text/html, */*'
    Request.ContentLength = 0
    Request.ContentRangeEnd = 0
    Request.ContentRangeStart = 0
    Request.ProxyPort = 0
    Request.UserAgent = 'Mozilla/3.0 (compatible; Indy Library)'
    Left = 264
    Top = 64
  end
end