View Issue Details

IDProjectCategoryView StatusLast Update
0004048JEDI VCL00 JVCL Componentspublic2007-06-19 08:25
ReporterZENsanAssigned Toobones 
PrioritynormalSeverityminorReproducibilityalways
Status resolvedResolutionno change required 
Product VersionDaily / GIT 
Target VersionFixed in Version 
Summary0004048: JvStrToHTML
DescriptionI suggess to change unit source to this, so unicode will be accepted as well. Maybe it need to include this in IFDEF compilation for example..
Because using UTF8Encode can be corrupt by StrToHTML...
Additional Informationtype
  TJvStrToHtml = class(TJvComponent)
  private
    FHtml: WideString;
    FValue: WideString;
    procedure SetHtml(const Value: WideString);
    procedure SetValue(const Value: WideString);
  public
    constructor Create(AOwner: TComponent); override;
    function TextToHtml(const Text: WideString): WideString;
    function HtmlToText(const Text: WideString): WideString;
  published
    property Text: WideString read FValue write SetValue;
    property Html: WideString read FHtml write SetHtml;
  end;

function WideStringToHtml(const Value: WideString): WideString;
function HtmlToString(const Value: WideString): WideString;
function CharToHtml(Ch: WideChar): WideString;

{$IFDEF UNITVERSIONING}
const
  UnitVersioning: TUnitVersionInfo = (
    RCSfile: '$URL: https://jvcl.svn.sourceforge.net/svnroot/jvcl/branches/JVCL3_30_PREPARATION/run/JvStrToHtml.pas $';
    Revision: '$Revision: 10612 $';
    Date: '$Date: 2006-05-19 21:04:09 +0200 (ven., 19 mai 2006) $';
    LogPath: 'JVCL\run'
  );
{$ENDIF UNITVERSIONING}

implementation


type
  TJvHtmlCodeRec = packed record
    Ch: WideChar;
    Html: PWideChar;
  end;

const
  { References:
      http://www.w3.org/TR/REC-html40/charset.html#h-5.3
      http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.2.1
      http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.4.1
  }
  Conversions: array [1..72] of TJvHtmlCodeRec = (
    (Ch: '"'; Html: '"'),
    (Ch: '<'; Html: '<'),
    (Ch: '>'; Html: '>'),
    (Ch: '^'; Html: 'ˆ'),
    (Ch: '~'; Html: '˜'),
    (Ch: '£'; Html: '£'),
    (Ch: '§'; Html: '§'),
    (Ch: '°'; Html: '°'),
    (Ch: '²'; Html: '&sup2;'),
    (Ch: '³'; Html: '&sup3;'),
    (Ch: 'µ'; Html: 'µ'),
    (Ch: '·'; Html: '·'),
    (Ch: '¼'; Html: '&frac14;'),
    (Ch: '½'; Html: '&frac12;'),
    (Ch: '¿'; Html: '¿'),
    (Ch: 'À'; Html: 'À'),
    (Ch: 'Á'; Html: 'Á'),
    (Ch: 'Â'; Html: 'Â'),
    (Ch: 'Ã'; Html: 'Ã'),
    (Ch: 'Ä'; Html: 'Ä'),
    (Ch: 'Ã…'; Html: 'Å'),
    (Ch: 'Æ'; Html: 'Æ'),
    (Ch: 'Ç'; Html: 'Ç'),
    (Ch: 'È'; Html: 'È'),
    (Ch: 'É'; Html: 'É'),
    (Ch: 'Ê'; Html: 'Ê'),
    (Ch: 'Ë'; Html: 'Ë'),
    (Ch: 'ÃŒ'; Html: 'Ì'),
    (Ch: 'Í'; Html: 'Í'),
    (Ch: 'ÃŽ'; Html: 'Î'),
    (Ch: 'Ï'; Html: 'Ï'),
    (Ch: 'Ñ'; Html: 'Ñ'),
    (Ch: 'Ã’'; Html: 'Ò'),
    (Ch: 'Ó'; Html: 'Ó'),
    (Ch: 'Ô'; Html: 'Ô'),
    (Ch: 'Õ'; Html: 'Õ'),
    (Ch: 'Ö'; Html: 'Ö'),
    (Ch: 'Ù'; Html: 'Ù'),
    (Ch: 'Ú'; Html: 'Ú'),
    (Ch: 'Û'; Html: 'Û'),
    (Ch: 'Ãœ'; Html: 'Ü'),
    (Ch: 'Ý'; Html: 'Ý'),
    (Ch: 'ß'; Html: 'ß'),
    (Ch: 'á'; Html: 'á'),
    (Ch: 'à'; Html: 'à'),
    (Ch: 'â'; Html: 'â'),
    (Ch: 'ã'; Html: 'ã'),
    (Ch: 'ä'; Html: 'ä'),
    (Ch: 'Ã¥'; Html: 'å'),
    (Ch: 'æ'; Html: 'æ'),
    (Ch: 'ç'; Html: 'ç'),
    (Ch: 'é'; Html: 'é'),
    (Ch: 'è'; Html: 'è'),
    (Ch: 'ê'; Html: 'ê'),
    (Ch: 'ë'; Html: 'ë'),
    (Ch: 'ì'; Html: 'ì'),
    (Ch: 'í'; Html: 'í'),
    (Ch: 'î'; Html: 'î'),
    (Ch: 'ï'; Html: 'ï'),
    (Ch: 'ñ'; Html: 'ñ'),
    (Ch: 'ò'; Html: 'ò'),
    (Ch: 'ó'; Html: 'ó'),
    (Ch: 'ô'; Html: 'ô'),
    (Ch: 'õ'; Html: 'õ'),
    (Ch: 'ö'; Html: 'ö'),
    (Ch: '÷'; Html: '÷'),
    (Ch: 'ù'; Html: 'ù'),
    (Ch: 'ú'; Html: 'ú'),
    (Ch: 'û'; Html: 'û'),
    (Ch: 'ü'; Html: 'ü'),
    (Ch: 'ý'; Html: 'ý'),
    (Ch: 'ÿ'; Html: 'ÿ')
    );

constructor TJvStrToHtml.Create(AOwner: TComponent);
begin
  inherited Create(AOwner);
  FValue := '';
  FHtml := '';
end;

function TJvStrToHtml.HtmlToText(const Text: WideString): WideString;
begin
  Result := HtmlToString(Text);
end;

procedure TJvStrToHtml.SetHtml(const Value: WideString);
begin
  FValue := HtmlToText(Value);
end;

procedure TJvStrToHtml.SetValue(const Value: WideString);
begin
  FHtml := TextToHtml(Value);
end;

function TJvStrToHtml.TextToHtml(const Text: WideString): WideString;
begin
  Result := WideStringToHtml(Text);
end;

function WideStringToHtml(const Value: WideString): WideString;
var
  I, J: Integer;
  Len, AddLen, HtmlLen: Integer;
  P: PWideChar;
  Ch: WideChar;
begin
  Len := Length(Value);
  // number of chars to add
  AddLen := 0;
  for I := 1 to Len do
    for J := Low(Conversions) to High(Conversions) do
      if Value[I] = Conversions[J].Ch then
      begin
        Inc(AddLen, Length(Conversions[J].Html) - 1);
        Break;
      end;

  if AddLen = 0 then
    Result := Value
  else
  begin
    SetLength(Result, Len + AddLen);
    P := Pointer(Result);
    for I := 1 to Len do
    begin
      Ch := Value[I];
      for J := Low(Conversions) to High(Conversions) do
        if Ch = Conversions[J].Ch then
        begin
          HtmlLen := LenGth(Conversions[J].Html);
          Move(Conversions[J].Html[0], P[0], HtmlLen * SizeOf(WideChar)); // Conversions[].Html is a PChar
          Inc(P, HtmlLen);
          Ch := #0;
          Break;
        end;
      if Ch <> #0 then
      begin
        P[0] := Ch;
        Inc(P);
      end;
    end;
  end;
end;

function HtmlToString(const Value: WideString): WideString;
var
  I, Index, Len: Integer;
  Start, J: Integer;
  Ch: WideChar;
  ReplStr: WideString;
begin
  Len := Length(Value);
  SetLength(Result, Len); // worst case
  Index := 0;
  I := 1;
  while I <= Len do
  begin
    Ch := Value[I];
   // html entitiy
    if Ch = '&' then
    begin
      Start := I;
      Inc(I);
      while (I <= Len) and (Value[I] <> ';') and (I < Start + 20) do
        Inc(I);
      if Value[I] <> ';' then
        I := Start
      else
      begin
        Ch := #0;
        ReplStr := LowerCase(Copy(Value, Start, I - Start + 1));
        for J := Low(Conversions) to High(Conversions) do
          if Conversions[J].Html = ReplStr then
          begin
            Ch := Conversions[J].Ch;
            Break;
          end;

        // if no conversion was found, it may actually be a number
        if Ch = #0 then
        begin
          if StrToIntDef(ReplStr, -1) <> -1 then
          begin
            Ch := WideChar(StrToInt(ReplStr));
          end
          else
          begin
            I := Start;
            Ch := Value[I];
          end;
        end;
      end;
    end;

    Inc(I);
    Inc(Index);
    Result[Index] := Ch;
  end;
  if Index <> Len then
    SetLength(Result, Index);
end;

function CharToHtml(Ch: WideChar): WideString;
var
  I: Integer;
begin
  for I := Low(Conversions) to High(Conversions) do
    if Conversions[I].Ch = Ch then
    begin
      Result := Conversions[I].Html;
      Exit;
    end;
  Result := Ch;
end;
TagsNo tags attached.

Activities

obones

2007-02-02 06:49

administrator   ~0010693

Please explain in more details and provide a patch file, this makes integration MUCH easier.

2007-02-06 12:39

 

Arioch

2007-02-06 12:40

developer   ~0010699

Please, don't do it! or You will corrupt all the cyrillic letters !
See screenshot.

ZENsan

2007-03-01 08:17

reporter   ~0011250

I think we can close this issue. Because most of developers I think do not use unicode and that smaller part of them like me can implmeent this manually. So close this issue and all folks!

obones

2007-06-19 08:25

administrator   ~0013458

Thanks for letting us know.

Issue History

Date Modified Username Field Change
2007-02-02 04:18 ZENsan New Issue
2007-02-02 06:49 obones Note Added: 0010693
2007-02-02 06:49 obones Status new => feedback
2007-02-06 12:39 Arioch File Added: Delphi_sources_are_not_Latin1.png
2007-02-06 12:40 Arioch Note Added: 0010699
2007-03-01 08:17 ZENsan Note Added: 0011250
2007-06-19 08:25 obones Status feedback => resolved
2007-06-19 08:25 obones Resolution open => no change required
2007-06-19 08:25 obones Assigned To => obones
2007-06-19 08:25 obones Note Added: 0013458