Marco's Web Center

Menu for Development

Site Menu
Object Pascal Handbook
Delphi Handbooks Collection
Mastering Borland Delphi 2005
(Old) White Papers
(Old)Tools
(Old) Conferences

My Other Sites
Italian Site (www.marcocantu.it)
the delphi search

Spirit of delphi

Advertising
Home My Blog Books Object Pascal Marco

Home: Code Repository: Mastering Delphi 6

Chapter 22 - Project WebSearcher

Project Structure

WebSearcher.dpr
program WebSearcher;

{$APPTYPE CONSOLE}

uses
  WebBroker,
  CGIApp,
  WebSearch in 'WebSearch.pas' {WebModule1: TWebModule};

{$R *.RES}

begin
  Application.Initialize;
  Application.CreateForm(TWebModule1, WebModule1);
  Application.Run;
end.
WebSearch.pas
unit WebSearch;

interface

uses
  Windows, Messages, SysUtils, Classes, HTTPApp, HTTPProd, Db, DBClient, DBWeb,
  IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient, IdHTTP;

type
  TWebModule1 = class(TWebModule)
    DataSetTableProducer1: TDataSetTableProducer;
    cds: TClientDataSet;
    IdHTTP1: TIdHTTP;
    procedure WebModule1WebActionItem1Action(Sender: TObject;
      Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
    procedure WebModule1WebActionItem2Action(Sender: TObject;
      Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
    procedure DataSetTableProducer1FormatCell(Sender: TObject; CellRow,
      CellColumn: Integer; var BgColor: THTMLBgColor;
      var Align: THTMLAlign; var VAlign: THTMLVAlign; var CustomAttrs,
      CellData: String);
  private
    { Private declarations }
  public
    strRead: string;
    procedure HtmlStringToCds;
    procedure GrabHtml (strurl: string);

  end;

var
  WebModule1: TWebModule1;

implementation

{$R *.DFM}

const
  strSearch = 'http://www.google.com/search?as_q=borland+delphi&num=100';

procedure TWebModule1.GrabHtml (strurl: string);
var
  Http1: TIdHTTP;
begin
  Http1 := TIdHTTP.Create (nil);
  try
    strRead := Http1.Get (StrUrl);
  finally
    Http1.Free;
  end;
end;

procedure TWebModule1.HtmlStringToCds;
var
  strAddr, strText: string;
  nText: integer;
  nBegin, nEnd: Integer;
begin
  strRead := LowerCase (strRead);
  repeat
    // find the initial part HTTP reference
    nBegin := Pos ('href=http', strRead);
    if nBegin <> 0 then
    begin
      // get the remaining part of the string, starting with 'http'
      strRead := Copy (strRead, nBegin + 5, 1000000);
      // find the end of the HTTP reference
      nEnd := Pos ('>', strRead);
      strAddr := Copy (strRead, 1, nEnd - 1);
      // move on
      strRead := Copy (strRead, nEnd + 1, 1000000);
      // add the URL if 'google' is not in it
      if Pos ('google', strAddr) = 0 then
      begin
        nText := Pos ('</a>', strRead);
        strText := copy (strRead, 1, nText - 1);

        // remove cached references and duplicates
        if (Pos ('cached', strText) = 0) and not cds.Locate ('Address', strAddr, []) then
          cds.InsertRecord ([0, strAddr, strText]);
      end;
    end;
  until nBegin = 0;
end;

procedure TWebModule1.WebModule1WebActionItem1Action(Sender: TObject;
  Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
var
  I: integer;
begin
  if not cds.Active then
    cds.CreateDataSet
  else
    cds.EmptyDataSet;

  for i := 0 to 5 do // how many pages?
  begin
    // get the data form the search site
    GrabHtml (strSearch + '&start=' + IntToStr (i*100));
    // scan it to fill the cds
    HtmlStringToCds;
  end;

  cds.First;
  // return producer content
  Response.Content := DataSetTableProducer1.Content;
end;

procedure TWebModule1.WebModule1WebActionItem2Action(Sender: TObject;
  Request: TWebRequest; Response: TWebResponse; var Handled: Boolean);
begin
  GrabHtml (strSearch);
  Response.Content := strRead;
end;

function SplitLong(str: string): string;
begin
  // add spaces after / but not at the beginning, and after &
  str := Copy (str, 1, 20) + StringReplace (
      Copy (str, 21, 1000), '/', '/ ', [rfReplaceAll]);
  Result := StringReplace (str, '&', '& ', [rfReplaceAll])
end;

procedure TWebModule1.DataSetTableProducer1FormatCell(Sender: TObject;
  CellRow, CellColumn: Integer; var BgColor: THTMLBgColor;
  var Align: THTMLAlign; var VAlign: THTMLVAlign; var CustomAttrs,
  CellData: String);
begin
  if CellRow <> 0 then
  case CellColumn of
    0: CellData := IntToStr (CellRow);
    1: CellData := '<a href="' +  CellData + '">' + SplitLong(CellData) + '</a>';
    2: CellData := SplitLong (CellData);
  end;
end;

end.
WebSearch.dfm
object WebModule1: TWebModule1
  OldCreateOrder = False
  Actions = <
    item
      Default = True
      Name = 'WebActionItem1'
      PathInfo = '/sites'
      OnAction = WebModule1WebActionItem1Action
    end
    item
      Name = 'WebActionItem2'
      PathInfo = '/source'
      OnAction = WebModule1WebActionItem2Action
    end>
  Left = 234
  Top = 107
  Height = 373
  Width = 443
  object DataSetTableProducer1: TDataSetTableProducer
    Columns = <
      item
        BgColor = 'Gray'
        Custom = 'width="20"'
        FieldName = 'Index'
      end
      item
        FieldName = 'Address'
      end
      item
        FieldName = 'Description'
      end>
    MaxRows = -1
    DataSet = cds
    TableAttributes.Border = 1
    TableAttributes.CellSpacing = 0
    TableAttributes.CellPadding = 2
    OnFormatCell = DataSetTableProducer1FormatCell
    Left = 88
    Top = 56
  end
  object cds: TClientDataSet
    Aggregates = <>
    FieldDefs = <
      item
        Name = 'Index'
        DataType = ftInteger
      end
      item
        Name = 'Address'
        DataType = ftString
        Size = 200
      end
      item
        Name = 'Description'
        DataType = ftString
        Size = 200
      end>
    IndexDefs = <
      item
        Name = 'cdsIndexAddress'
        Fields = 'Address'
      end>
    IndexFieldNames = 'Address'
    Params = <>
    StoreDefs = True
    Left = 176
    Top = 56
  end
  object IdHTTP1: TIdHTTP
    Request.Accept = 'text/html, */*'
    Request.ContentLength = 0
    Request.ContentRangeEnd = 0
    Request.ContentRangeStart = 0
    Request.ProxyPort = 0
    Request.UserAgent = 'Mozilla/3.0 (compatible; Indy Library)'
    Left = 264
    Top = 64
  end
end