unit StringFunctions; { [StringFunctions] [6.1] Delphi 2005 January 2007 LICENSE The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at "http://www.mozilla.org/MPL/" Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. The Original Code is "[StringFunctions.pas]". The Initial Developer of the Original Code is Martin Holmes (Victoria, BC, Canada, "http://www.mholmes.com/"). Copyright (C) 2006 Martin Holmes and the University of Victoria Computing and Media Centre. The code was co-developed for university and personal projects, and rights are shared by Martin Holmes and the University of Victoria. All Rights Reserved. } { This is a huge mass of various string and widestring functions created over the years and through various projects. Caveat user -- many are obsolete and only those used in current applications are regularly tested! Dependencies: TntUnicodeControls (Troy Wolbrink) } {$R Charcode.res} interface uses Windows, Forms, SysUtils, Classes, JclUnicode, TntClasses, TntSysUtils, WideStrUtils; const PuncMarks: set of Char = ['!', '"', '(', ')', #91, #93, '{', '}', ',', '.', ';', ':', '<', '>', '?', '‹', '›', '¡', '`', '‚', '„', '…', '‘', '’', '“', '”', '«', '»', '¿']; XMLWhitespace = [WideChar(#32), WideChar(#13), WideChar(#10), WideChar(#9)]; WebExts = WideString('htm||html||asp||aspx||shtml||gif||jpg||png||pdf||cgi||pl||cfm||cfml||shtm||php||php3||xml||xhtml||svg'); function PosEx(const SubStr: string; const S: string; StartIndex: integer): integer; function Webble(InString: string): string; function Webble2(InString: string): string; function GetDecUnicodeNumFromWinNum(WinNum: integer): integer; function UnWebble(InString: string): string; function MacCharToPC(InString: string): string; function PCCharToMac(InString: string): string; function PCCharToHex(InString: string): string; function HexToPCChar(InString: string): string; function PCCharToMacHex(InString: string): string; function PCCharToUnderscoreHex(InString: string): string; function AllCharsToHTMLHex(InString: string): string; function MakeEscapeLookup(InString: string): string; function TrimString(InString: string): string; function RemoveReturns(InString: string): string; function HTMLParas(InString: string): string; function ReturnsToBR(InString: string): string; function StripPunctuation(InString: string): string; function HideReturns(InString: string): string; function RestoreReturns(InString: string): string; function ReplaceStuff(Token, Replacement, InString: string): string; function WReplaceStuff(Token, Replacement, InString: WideString): WideString; function EscapeSingleQuotes(InString: string): string; function EscapeDoubleQuotes(InString: string): string; function EscapeAllQuotes(InString: string): string; function QuotesToPercentEscapes(InString: string): string; function DQuotesToEntity(InString: string): string; function EscapeAngleBrackets(InString: string): string; function MakeNonBreaking(InString: string): string; function GetLastHeadCloseTagLocation(InString: string): integer; function GetLastBodyOpenTagLocation(InString: string): integer; function GetLastBodyCloseTagLocation(InString: string): integer; function IsGoodEMailAddress(InString: string): Boolean; function RemoveDoubleSpaces(InString: string): string; function WRemoveDoubleSpaces(InString: WideString): WideString; function WRemoveReturns(InString: WideString): WideString; function GetTextBetween(InString, StartMarker, EndMarker: string): string; function WGetTextBetween(InString, StartMarker, EndMarker: WideString): WideString; function GetTextBetweenExtra(var OriginalString: string; StartMarker, EndMarker, Replacement: string; IncludeMarkers, Replace: Boolean): string; function ReplaceTextFromTo(InString: string; StartMarker, EndMarker, Replacement: string): string; procedure SplitWords(InString: string; var OutList: TStringList); procedure WideSplitWords(InString: WideString; var OutList: TWideStringList); function RemoveFileExtension(InString: string): string; function SplitString(CharsPerString: integer; InString: string; var OutList: TStringList): Boolean; function SplitStringToLines(LineEndToken: string; InString: string; var OutList: TStringList): Boolean; function SplitStringToLinesEx(LineEndToken: string; InString: string; var OutList: TStringList): Boolean; function MakeJavaScriptString(StringName, InString: string): string; function MakeHexJavaScriptString(StringName, InString: string): string; function MakeJavaScriptArray(ArrayName: string; ArrayItems: TStringList; StartFrom: integer): string; function StripChars(CharsToStrip: string; InString: string): string; function IncludeStuff(InString, Tag: string; IncludeIt: Boolean): string; function InsertMetaTag(WebPage, ProgName, UserName: string): string; function RemoveJSComments(InString: string): string; function RemoveJSComments2(InString: string): string; function SeparateJavaScript(var InString: string; HTMLFileName: string): Boolean; function SeparateFirstJavaScript(var InString: string; HTMLFileName: string): Boolean; function CleanupFileName(InName: string): string;//Removes iffy chars and spaces from file names function TextToAudioFileName(Text: WideString; Extension: WideString): WideString; function AudioFileNameToText(AudioFileName: WideString; Extension: WideString): WideString; function SLIndexOf(SList: TStringList; S: string; CaseSensitive: Boolean): integer; //Unicode/WideString functions function WPosEx(const SubStr: WideString; const S: WideString; StartIndex: integer): integer; //My code function WidePosEx(const SubStr, S: WideString; Offset: Integer = 1): Integer; //Adapted Delphi code function WideStringToANSI(WS: WideString): string; function WEscapeSingleQuotes(InString: Widestring): Widestring; function WEscapeDoubleQuotes(InString: Widestring): Widestring; function WEscapeAllQuotes(InString: Widestring): Widestring; function WMakeCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString; function WUriAsCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString; function WUnescapeXMLAttribute(InString: WideString): WideString; function WEscapeAngleBrackets(InString: Widestring): Widestring; function WNormalizeReturns(InString: WideString): WideString; function WideStringToHTMLNumeric(InString: WideString): string; function WideStringToHTMLNumericAbove255(InString: WideString): string; function WideStringToHTMLHex(InString: WideString): string; function WideStringToHTMLOutput(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion function WideStringToHTMLOutputCentredRTL(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion function WideStringToJSUnicode(InString: WideString): string; function WideStringToWideJSUnicode(InString: WideString): WideString; function WideStringToExplanation(InString: WideString): string; function WReturnsToBR(InString: WideString): WideString; function WideStringToJSUnicodeEx(InString: WideString): string; function WideStringToJSUnicodeExNoTags(InString: WideString): string; function WideStringFromHTMLNumeric(InString: string): WideString; function WideStringFromWSHTMLNumeric(InString: WideString): WideString; function HDecimalUCodeToJHexUCode(InString: string): string; function JHexUCodeToHDecimalUCode(InString: string): string; function JHexUCodeToWideString(InString: WideString): WideString; function PCCharToJSUnicode(InString: string): string; function PCCharToJSUnicodeEx(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption function PCCharToHTMLNumeric(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption function WAllCharsToJSUnicode(InString: WideString): WideString; function WAllCharsToHTMLHex(InString: WideString): WideString; function WTagsToLowerCase(InString: WideString): WideString; function MakeRTLRightAlignedPara(InString: string): string; function FirstNonPuncCharIsRTL(InString: WideString): Boolean; function LastNonPuncCharIsRTL(InString: WideString): Boolean; function HasMoreRTLCharsThanLTRChars(InString: WideString): Boolean; function TextIsMostLikelyRTL(InString: WideString): Boolean; function InsertRTLSpans(InString: WideString): WideString; function ProcessForRTLBoundaries(InString: WideString; Centred: Boolean): WideString; function TextIsMostLikelyCJK(InString: WideString): Boolean; function ParseWideStringToPerceivedChars(var CharList: TWideStringList; WInString: WideString; RemoveAscii: Boolean): Boolean; function ShowReturns(InString: string): string; function UniqueIDFromTime: string; procedure CopyUnicodeTextToClipboard(InText: WideString); function WGetUnicodePunctuation(InString: WideString): string; function WGetUnicodeOpenPunctuation(InString: WideString): string; function WUnicodeIsSpacePunc(InChar: WideChar): Boolean; function WUnicodeIsReturn(InChar: WideChar): Boolean; function WUnicodeIsCJK(InChar: WideChar): Boolean; function WUnicodeIsCombining(InChar: WideChar): Boolean; function WUnicodeChangesDirectionality(RTL: Boolean; InChar: WideChar): Boolean; function BuildWebKeypad(InString: WideString; ProcessForRTL: Boolean): string; function WConvertUnmatchedAngleBrackets(InString: WideString): WideString; function WParseTagOrAttributes(InString: WideString; uslPairs: TTntStringList): Boolean; function WStrIsLikelyURL(InString: WideString): Boolean; //This function checks whether a substring inside a widestring is a whole //word (delimited by punctuation or spaces). function IsWholeWordHit(wsText: WideString; StartPos, EndPos: integer): Boolean; function ParseAppVersionFromWideString(wsInput: WideString; var verMajor: integer; var verMinor: integer; var verBuild: integer; var verRelease: integer; var wsOutput: WideString): Boolean; function FileHasUTF8Header(const FileName: WideString): Boolean; function HasUTF8Header(InString: string): Boolean; function FileMayBeUTF8(FileName: WideString): Boolean; function LoadFileCarefullyToWideString(FileName: WideString): WideString; function GetFileCharSet(FileName: WideString): TTntStreamCharSet; function WTruncWSToMaxLen(InString: WideString; MaxLen: integer): WideString; implementation type StrRec = record allocSiz: Longint; refCnt: Longint; length: Longint; end; const skew = sizeof(StrRec); rOff = sizeof(StrRec)-sizeof(Longint); overHead = sizeof(StrRec)+1; //Pos function with StartIndex parameter added. Uses record and constants above. //Found somewhere on the Web many years ago; now not needed because Delphi //includes one. function PosEx(const SubStr: AnsiString; const S: AnsiString; StartIndex: integer): integer; assembler; asm {->EAX Pointer to substr } { EDX Pointer to string } {<-EAX Position of substr in s or 0 } TEST EAX,EAX JE @@noWork TEST EDX,EDX JE @@stringEmpty OR ECX,ECX {StartIndex = 0 ? return 0 } JZ @@stringEmpty CMP ECX,80000000h JAE @@stringEmpty {StartIndex < 0 ? return 0 } PUSH EBX PUSH ESI PUSH EDI MOV ESI,EAX { Point ESI to substr } MOV EDI,EDX { Point EDI to s } MOV EBX,ECX MOV ECX,[EDI-skew].StrRec.length { ECX = Length(s) } CMP ECX,EBX JB @@badindex { StartIndex > Length(s) ? return 0 } SUB ECX,EBX INC ECX { adjust Length(s) } PUSH EDI { remember s position to calculate index } ADD EDI,EBX DEC EDI { Point EDI to start of s + StartIndex - 1 } MOV EDX,[ESI-skew].StrRec.length { EDX = Length(substr) } DEC EDX { EDX = Length(substr) - 1 } JS @@fail { < 0 ? return 0 } MOV AL,[ESI] { AL = first char of substr } INC ESI { Point ESI to 2'nd char of substr } SUB ECX,EDX { #positions in s to look at } JLE @@fail @@loop: REPNE SCASB JNE @@fail MOV EBX,ECX { save outer loop counter } PUSH ESI { save outer loop substr pointer } PUSH EDI { save outer loop s pointer } MOV ECX,EDX REPE CMPSB POP EDI { restore outer loop s pointer } POP ESI { restore outer loop substr pointer } JE @@found MOV ECX,EBX { restore outer loop counter } JMP @@loop @@fail: POP EDX { get rid of saved s pointer } XOR EAX,EAX JMP @@exit @@stringEmpty: XOR EAX,EAX JMP @@noWork @@badindex: XOR EAX,EAX JMP @@exit @@found: POP EDX { restore pointer to first char of s } MOV EAX,EDI { EDI points of char after match } SUB EAX,EDX { the difference is the correct index } @@exit: POP EDI POP ESI POP EBX @@noWork: end; //changes upper-ascii chars to html escape codes in a string function Webble(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 127 then OutString := OutString + LoadStr(Ord(InString[i])) else OutString := OutString + InString[i]; end; Result := OutString; end; //changes upper-ascii chars to unicode numeric html escape codes in a string function Webble2(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; if Length(InString) < 1 then Exit; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 159 then OutString := OutString + '&#' + IntToStr(Ord(InString[i])) + ';' else if Ord(InString[i]) > 127 then OutString := OutString + '&#' + IntToStr(GetDecUnicodeNumFromWinNum(Ord(InString[i]))) + ';' else OutString := OutString + InString[i]; end; Result := OutString; end; function GetDecUnicodeNumFromWinNum(WinNum: integer): integer; begin Case WinNum of 128: Result := 8364; 130: Result := 8218; 131: Result := 402; 132: Result := 8222; 133: Result := 8230; 134: Result := 8224; 135: Result := 8225; 136: Result := 710; 137: Result := 8240; 138: Result := 352; 139: Result := 8249; 140: Result := 338; 142: Result := 381; 145: Result := 8216; 146: Result := 8217; 147: Result := 8220; 148: Result := 8221; 149: Result := 8226; 150: Result := 8211; 151: Result := 8212; 152: Result := 732; 153: Result := 8482; 154: Result := 353; 155: Result := 8250; 156: Result := 339; 158: Result := 382; 159: Result := 376; else Result := WinNum; end; end; //Converts HTML escape codes back to winchars function UnWebble(InString: string): string; var i: integer; ECode: string; begin for i := 128 to 255 do begin ECode := LoadStr(i); if Length(ECode) > 0 then InString := ReplaceStuff(ECode, Chr(i), InString); end; Result := InString; end; //changes upper-ascii Mac chars to PC equivalents in a string function MacCharToPC(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 127 then OutString := OutString + LoadStr(Ord(InString[i]) + 1000) else OutString := OutString + InString[i]; end; Result := OutString; end; //changes upper-ascii PC chars to Mac equivalents in a string function PCCharToMac(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 127 then OutString := OutString + LoadStr(Ord(InString[i]) + 2000) else OutString := OutString + InString[i]; end; Result := OutString; end; //changes upper-ascii PC (ISO) char numbers to '%' + hexadecimal numbers (for JavaScript escape codes) in a string function PCCharToHex(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 127 then OutString := OutString + LoadStr(Ord(InString[i]) + 3000) else begin if Ord(InString[i]) = 37 then OutString := OutString + '%25' else OutString := OutString + InString[i]; end; end; Result := OutString; end; //Changes hexed numbers to PCChars function HexToPCChar(InString: string): string; var i: integer; ECode: string; begin for i := 128 to 255 do begin ECode := LoadStr(3000+i); if Length(ECode) > 0 then InString := ReplaceStuff(ECode, Chr(i), InString); end; Result := InString; end; //changes upper-ascii PC (ISO) char numbers to '%' + hexadecimal //numbers of Mac equivalents (for JavaScript escape codes) in a string function PCCharToMacHex(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 127 then OutString := OutString + LoadStr(Ord(InString[i]) + 4000) else begin if Ord(InString[i]) = 37 then OutString := OutString + '%25' else OutString := OutString + InString[i]; end; end; Result := OutString; end; function PCCharToUnderscoreHex(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if Ord(InString[i]) > 127 then OutString := OutString + '_' + UpperCase(IntToHex(Ord(InString[i]), 2)) else OutString := OutString + InString[i]; end; Result := OutString; end; function AllCharsToHTMLHex(InString: string): string; var i: LongInt; OutString: string; CharNum: integer; begin OutString := ''; for i := 1 to Length(InString) do begin CharNum := Ord(InString[i]); if (CharNum < 160) and (CharNum > 127) then CharNum := GetDecUnicodeNumFromWinNum(CharNum); OutString := OutString + '&#x' + UpperCase(IntToHex(CharNum, 4)) + ';'; end; Result := OutString; end; function MakeEscapeLookup(InString: string): string; var i: integer; CharArray: string; EscapeArray: string; UsedList: string; Counter: integer; begin Result := ''; CharArray := 'CharList = new Array();' + #13#10; EscapeArray := 'EscapeList = new Array();' +#13#10; if Length(InString) < 1 then begin Result := CharArray + EscapeArray; Exit; end; Counter := 0; for i := 1 to Length(InString) do begin if (Ord(InString[i]) > 127) and (Pos(InString[i], UsedList) < 1) then begin UsedList := UsedList + InString[i]; CharArray := CharArray + 'CharList[' + IntToStr(Counter) + ']=''' + LoadStr(Ord(InString[i]) + 3000) + ''';' + #13#10; EscapeArray := EscapeArray + 'EscapeList[' + IntToStr(Counter) + ']=''' + LoadStr(Ord(InString[i])) + ''';' + #13#10; inc(Counter); if AnsiUpperCase(InString[i]) <> InString[i] then begin CharArray := CharArray + 'CharList[' + IntToStr(Counter) + ']=''' + LoadStr(Ord(AnsiUpperCase(InString)[i]) + 3000) + ''';' + #13#10; EscapeArray := EscapeArray + 'EscapeList[' + IntToStr(Counter) + ']=''' + LoadStr(Ord(AnsiUpperCase(InString)[i])) + ''';' + #13#10; inc(Counter); end; if AnsiLowerCase(InString[i]) <> InString[i] then begin CharArray := CharArray + 'CharList[' + IntToStr(Counter) + ']=''' + LoadStr(Ord(AnsiLowerCase(InString)[i]) + 3000) + ''';' + #13#10; EscapeArray := EscapeArray + 'EscapeList[' + IntToStr(Counter) + ']=''' + LoadStr(Ord(AnsiLowerCase(InString)[i])) + ''';' + #13#10; inc(Counter); end; end; end; if Counter > 0 then begin CharArray := CharArray + #13#10 + 'for (var i=0; i 0 do begin Position := Pos(#9, InString); InString := Copy(InString, 0, Position - 1) + ' ' + Copy(InString, Position + 1, Length(InString) - Position); end; while Pos(' ', InString) > 0 do begin Position := Pos(' ', InString); InString := Copy(InString, 0, Position - 1) + Copy(InString, Position + 1, Length(InString) - Position); end; while (InString[1] = ' ') or (InString[1] = #13) or (InString[1] = #10) do InString := Copy(InString, 2, Length(InString) - 1); while (InString[Length(InString)] = ' ') or (InString[Length(InString)] = #13) or (InString[Length(InString)] = #10) do InString := Copy(InString, 1, Length(InString) - 1); Result := InString; end; function RemoveReturns(InString: string): string; var Position: LongInt; begin while Pos(#13, InString) > 0 do begin Position := Pos(#13, InString); InString := Copy(InString, 0, Position - 1) + Copy(InString, Position + 1, Length(InString) - Position); end; while Pos(#10, InString) > 0 do begin Position := Pos(#10, InString); InString := Copy(InString, 0, Position - 1) + Copy(InString, Position + 1, Length(InString) - Position); end; Result := InString; end; //Adds HTML
tags for each return and 5 X   for tabs function HTMLParas(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 to Length(InString) do begin if InString[i] = #13 then OutString := OutString + '
' + InString[i] else begin if InString[i] = #9 then OutString := OutString + '     ' else OutString := OutString + InString[i]; end; end; Result := OutString; end; //Replaces all returns with
tags, and removes unwanted //BRs in table code function ReturnsToBR(InString: string): string; begin //Line added for 5.5.0.12 -- indentation in table code produced by other authoring //tools was causing problems Result := RemoveDoubleSpaces(InString); Result := ReplaceStuff(#13#10, '
', Result); //New section -- doesn't screw up table code Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); //New lines for table head for HotPot 6 and Quandary 2.1 Result := ReplaceStuff('
', '', Result); Result := ReplaceStuff('
', '', Result); //Another new section -- don't screw up lists! Result := ReplaceStuff('

', '', Result); Result := ReplaceStuff('
', '', Result); end; //Removes all punctuation except single quotes function StripPunctuation(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 To Length(InString) do begin if not (InString[i] in PuncMarks) then OutString := OutString + InString[i]; end; Result := OutString; end; //changes returns to Char #26s in a string (for saving string lists) function HideReturns(InString: string): string; var i: LongInt; begin if InString <> '' then while Pos(#13#10, InString) > 0 do begin i := Pos(#13#10, InString); InString := Copy(InString, 0, i-1) + #26 + Copy(InString, i+2, (Length(InString) - (i+1))); end; Result := InString; end; //changes Char #26s to returns in a string (for loading string lists) function RestoreReturns(InString: string): string; var i: LongInt; begin if InString <> '' then while Pos(#26, InString) > 0 do begin i := Pos(#26, InString); InString := Copy(InString, 0, i-1) + #13#10 + Copy(InString, i+1, (Length(InString) - (i))); end; Result := InString; end; //replaces a substring in a string with another string function ReplaceStuff(Token, Replacement, InString: string): string; var i: LongInt; SoFar: LongInt; begin if InString <> '' then begin SoFar := 0; while Pos(Token, InString) > SoFar do begin i := Pos(Token, InString); SoFar := (i-1) + Length(Replacement); InString := Copy(InString, 0, i-1) + Replacement + Copy(InString, i+Length(Token), (Length(InString) - (i+Length(Token)-1))); end; end; Result := InString; end; function WReplaceStuff(Token, Replacement, InString: WideString): WideString; var i: integer; SoFar: integer; begin if InString <> '' then begin SoFar := 0; while Pos(Token, InString) > SoFar do begin i := Pos(Token, InString); SoFar := (i-1) + Length(Replacement); InString := Copy(InString, 0, i-1) + Replacement + Copy(InString, i+Length(Token), (Length(InString) - (i+Length(Token)-1))); end; end; Result := InString; end; //Places a backslash before each single quote function EscapeSingleQuotes(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 To Length(InString) do begin if (InString[i] = '''') or (InString[i] = '\') then OutString := OutString + '\'; OutString := OutString + InString[i]; end; Result := OutString; end; //Places a backslash before each double quote function EscapeDoubleQuotes(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 To Length(InString) do begin if InString[i] = '"' then OutString := OutString + '\'; OutString := OutString + InString[i]; end; Result := OutString; end; //Places a backslash before each single or double quote function EscapeAllQuotes(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 To Length(InString) do begin if (InString[i] = '"') or (InString[i] = '''') then OutString := OutString + '\'; OutString := OutString + InString[i]; end; Result := OutString; end; function QuotesToPercentEscapes(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 To Length(InString) do begin if (InString[i] = '"') then OutString := OutString + '%22' else if (InString[i] = '''') then OutString := OutString + '%27' else OutString := OutString + InString[i]; end; Result := OutString; end; //Replaces all double quotes with " -- used for button captions function DQuotesToEntity(InString: string): string; var i: LongInt; OutString: string; begin OutString := ''; for i := 1 To Length(InString) do begin if (InString[i] = '"') then OutString := OutString + '"' else OutString := OutString + InString[i]; end; Result := OutString; end; //Changes angle brackets to HTML escapes function EscapeAngleBrackets(InString: string): string; begin InString := ReplaceStuff('<', '<', InString); InString := ReplaceStuff('>', '>', InString); Result := InString; end; function MakeNonBreaking(InString: string): string; begin InString := ReplaceStuff(' ', ' ', InString); InString := ReplaceStuff(#9, ' ', InString); InString := ReplaceStuff(#13#10, ' ', InString); InString := ReplaceStuff(#13, ' ', InString); InString := ReplaceStuff(#10, ' ', InString); InString := Trim(InString); Result := InString; end; //finds the position of the last instance of an open body tag, if there is one, //in a Web page; returns the position of the first char after the end of the tag. function GetLastHeadCloseTagLocation(InString: string): integer; var i, j: integer; begin //Initialize default Result := -1; InString := ANSIUpperCase(InString); i := 0; j := Pos('', InString); while j > 0 do begin i := j; j := PosEx('', InString, j+1); end; Result := i; end; function GetLastBodyOpenTagLocation(InString: string): integer; var i, j: integer; Index1: integer; Index2: integer; begin //Initialize default Result := -1; InString := ANSIUpperCase(InString); i := 0; j := Pos(' 0 do begin i := j; j := PosEx('', InString); while j > 0 do begin i := j; j := PosEx('', InString, j+1); end; Index2 := i; if Index1 > Index2 then i := Index1 else i := Index2; if i > 0 then begin while (InString[i] <> '>') and (i < Length(InString)) do inc(i); inc(i); end; Result := i; end; //finds the position of the last instance of a close body tag, if there is one, //in a Web page; returns the position of the first char in the tag. function GetLastBodyCloseTagLocation(InString: string): integer; var i, j: integer; begin //Initialize default Result := -1; InString := ANSIUpperCase(InString); i := 0; j := Pos('', InString); while j > 0 do begin i := j; j := PosEx('', InString, j+1); end; if i > 0 then Result := i; end; function IsGoodEMailAddress(InString: string): Boolean; var i: integer; GoodChars: set of Char; begin Result := True; if Length(InString) < 5 then begin Result := False; Exit; end; GoodChars := ['A'..'Z', 'a'..'z', '0'..'9','_','-','.','@']; for i := 1 to Length(InString) do if not (InString[i] in GoodChars) then Result := False; end; //Replaces double spaces with single spaces until there are no more doubles function RemoveDoubleSpaces(InString: string): string; var Found: LongInt; begin while Pos(' ', InString) > 0 do begin Found := Pos(' ', InString); InString := Copy(InString, 1, Found) + Copy(InString, Found + 2, Length(InString) - (Found + 1)); end; Result := InString; end; function WRemoveDoubleSpaces(InString: WideString): WideString; var Found: integer; begin while Pos(' ', InString) > 0 do begin Found := Pos(' ', InString); InString := Copy(InString, 1, Found) + Copy(InString, Found + 2, Length(InString) - (Found + 1)); end; Result := InString; end; function WRemoveReturns(InString: WideString): WideString; begin InString := WideStringReplace(InString, #13#10, '', [rfReplaceAll]); InString := WideStringReplace(InString, #13, '', [rfReplaceAll]); InString := WideStringReplace(InString, #10, '', [rfReplaceAll]); Result := InString; end; procedure SplitWords(InString: string; var OutList: TStringList); var BeginWord: LongInt; i: LongInt; InWord: Boolean; begin //Clear the list OutList.Clear; //iterate through the string BeginWord := 1; InWord := False; for i := 1 to Length(InString) do begin if (InString[i] in PuncMarks) or (InString[i] = ' ') then begin //Ignore apostrophes inside words if not ((InString[i] = '''') and not (InString[i-1] in PuncMarks) and not (InString[i+1] in PuncMarks)) then begin if InWord then OutList.Append(Copy(InString, BeginWord, i-BeginWord)); if (InString[i] <> ' ') then begin if InString[i+1] = ' ' then OutList.Append(InString[i] + ' ') else if InString[i-1] = ' ' then OutList.Append(' ' + InString[i]) else OutList.Append(InString[i]); end; BeginWord := i + 1; InWord := False; end; end else begin InWord := True; if i = Length(InString) then OutList.Append(Copy(InString, BeginWord, (i-BeginWord) + 1)); end; end; end; procedure WideSplitWords(InString: WideString; var OutList: TWideStringList); var BeginWord: integer; i: integer; InWord: Boolean; begin //Clear the list OutList.Clear; //Bail if nothing useful coming in if Length(InString) < 1 then Exit; //Add initial and final space to avoid range check error! if InString[Length(InString)] <> ' ' then InString := InString + WideString(' '); if InString[1] <> ' ' then InString := WideString(' ') + InString; //iterate through the string BeginWord := 1; InWord := False; for i := 1 to Length(InString) do begin if (UnicodeIsWhitespace(Cardinal(InString[i]))) or (UnicodeIsPunctuation(Cardinal(InString[i]))) then begin //Ignore apostrophes inside words if not ((InString[i] = '''') and not (UnicodeIsPunctuation(Cardinal(InString[i-1]))) and not (UnicodeIsPunctuation(Cardinal(InString[i+1])))) then begin if InWord then OutList.Append(Copy(InString, BeginWord, i-BeginWord)); if (InString[i] <> ' ') then begin if InString[i+1] = ' ' then OutList.Append(InString[i] + WideString(' ')) else if InString[i-1] = ' ' then OutList.Append(WideString(' ') + InString[i]) else OutList.Append(InString[i]); end; BeginWord := i + 1; InWord := False; end; end else begin InWord := True; if i = Length(InString) then OutList.Append(Copy(InString, BeginWord, (i-BeginWord) + 1)); end; end; end; //Gets a piece of text between two markers and returns it function GetTextBetween(InString, StartMarker, EndMarker: string): string; var i: LongInt; j: LongInt; OutString: string; begin OutString := ''; //Make sure both markers exist i := Pos(StartMarker, InString); j := PosEx(EndMarker, InString, i+Length(StartMarker)); if (i < 1) or (j < 1) then begin Result := OutString; Exit; end; i := i + Length(StartMarker); OutString := Copy(InString, i, j - i); Result := OutString; end; function WGetTextBetween(InString, StartMarker, EndMarker: WideString): WideString; var StartPos, EndPos: integer; wsTemp: WideString; begin Result := ''; //default; StartPos := Pos(StartMarker, InString); if StartPos > 0 then begin wsTemp := Copy(InString, StartPos + Length(StartMarker), Length(InString) - (StartPos + Length(StartMarker) - 1)); EndPos := Pos(EndMarker, wsTemp); if EndPos > 0 then Result := Copy(wsTemp, 1, EndPos-1); end; end; //Gets a piece of text between two markers and returns it with or without markers; //can also replace the string in the original text function GetTextBetweenExtra(var OriginalString: string; StartMarker, EndMarker, Replacement: string; IncludeMarkers, Replace: Boolean): string; var i: LongInt; j: LongInt; OutString: string; begin OutString := ''; //Make sure both markers exist i := Pos(StartMarker, OriginalString); j := PosEx(EndMarker, OriginalString, i); if (i < 1) or (j < 1) then begin Result := OutString; Exit; end; J := j + Length(EndMarker); OutString := Copy(OriginalString, i, j - i); if Replace then OriginalString := ReplaceStuff(OutString, Replacement, OriginalString); if not IncludeMarkers then begin OutString := ReplaceStuff(StartMarker, '', OutString); OutString := ReplaceStuff(EndMarker, '', OutString); end; Result := OutString; end; //Replaces text between two markers, including the markers; //does only the first instance function ReplaceTextFromTo(InString: string; StartMarker, EndMarker, Replacement: string): string; var i: LongInt; j: LongInt; OutString: string; begin OutString := ''; //Make sure both markers exist i := Pos(StartMarker, InString); j := PosEx(EndMarker, InString, i); if (i < 1) or (j < 1) then begin Result := OutString; Exit; end; j := j + Length(EndMarker); OutString := Copy(InString, 1, i-1) + Replacement + Copy(InString, j, Length(InString) - (j-1)); Result := OutString; end; function RemoveFileExtension(InString: string): string; //Takes the dot and extension off the file path/name and returns it. //Returns the whole thing if there's no dot found. var i: integer; begin i := Length(InString); while (InString[i] <> '.') and (i > 0) do Dec(i); if i > 0 then Result := Copy(InString, 1, i-1) else Result := InString; end; //Splits a string into sections, and fills the //string list with those sections function SplitString(CharsPerString: integer; InString: string; var OutList: TStringList): Boolean; var BreakPoint: integer; begin //Set the result Result := False; //clear the string list OutList.Clear; //Add all but the final bit while Length(InString) > CharsPerString do begin BreakPoint := CharsPerString; while (InString[BreakPoint] = '\') and (BreakPoint < Length(InString)) do inc(BreakPoint); OutList.Add(Copy(InString, 1, BreakPoint)); InString := Copy(InString, BreakPoint + 1, Length(InString) - BreakPoint); end; //Add the final bit OutList.Add(InString); Result := True; end; //Splits a string into items in a string list based on the line end token function SplitStringToLines(LineEndToken: string; InString: string; var OutList: TStringList): Boolean; var TokenPosition: integer; StartPoint: integer; begin //Clear the string list OutList.Clear; //Check you have a valid token if Length(LineEndToken) < 1 then begin OutList.Add(InString); Result := False; Exit; end; //Work through the string TokenPosition := Pos(LineEndToken, InString); while TokenPosition > 0 do begin //Get the string OutList.Add(Copy(InString, 1, TokenPosition - 1)); //Remove it from InString StartPoint := TokenPosition + Length(LineEndToken); InString := Copy(InString, StartPoint, Length(InString) - (StartPoint - 1)); TokenPosition := Pos(LineEndToken, InString); end; //Add the last bit OutList.Add(InString); Result := True; end; //Splits a string into items in a string list based on the line end token; //Includes the token function SplitStringToLinesEx(LineEndToken: string; InString: string; var OutList: TStringList): Boolean; var TokenPosition: integer; StartPoint: integer; begin //Clear the string list OutList.Clear; //Check you have a valid token if Length(LineEndToken) < 1 then begin OutList.Add(InString); Result := False; Exit; end; //Work through the string TokenPosition := Pos(LineEndToken, InString); while TokenPosition > 0 do begin //Get the string OutList.Add(Copy(InString, 1, TokenPosition - 1) + LineEndToken); //Remove it from InString StartPoint := TokenPosition + Length(LineEndToken); InString := Copy(InString, StartPoint, Length(InString) - (StartPoint - 1)); TokenPosition := Pos(LineEndToken, InString); end; //Add the last bit if there's anything left if Length(InString) > 0 then OutList.Add(InString); Result := True; end; //Assigns a string to a JavaScript identifier. function MakeJavaScriptString(StringName, InString: string): string; begin Result := StringName + '=''' + HDecimalUCodeToJHexUCode(EscapeSingleQuotes(InString)) + ''';'; end; function MakeHexJavaScriptString(StringName, InString: string): string; begin Result := MakeJavaScriptString(StringName, PCCharToHex(InString)); end; function MakeJavaScriptArray(ArrayName: string; ArrayItems: TStringList; StartFrom: integer): string; var i: integer; OutString: string; ItemName: string; begin Outstring := ''; if ArrayItems.Count > 0 then begin for i := 0 to ArrayItems.Count - 1 do begin ItemName := ArrayName + '[' + IntToStr(StartFrom + i) + ']'; OutString := OutString + MakeJavaScriptString(ItemName, ArrayItems[i]) + #13#10; end; end; Result := OutString; end; function StripChars(CharsToStrip: string; InString: string): string; //this removes each of the chars in CharsToStrip from the string var FoundPos: integer; i: integer; CharString: string; begin Result := InString; for i := 1 to Length(CharsToStrip) do begin CharString := Copy(CharsToStrip, i, 1); FoundPos := Pos(CharString, InString); while FoundPos > 0 do begin Delete(InString, FoundPos, 1); FoundPos := Pos(CharsToStrip[i], InString); end; end; Result := InString; end; function IncludeStuff(InString, Tag: string; IncludeIt: Boolean): string; //This deletes an element bracketed by tags, or if IncludeIt is True, //it deletes the tags only. It operates on all instances of the tag in //InString. var OutString: string; OpenTag: string; CloseTag: string; StartPos: LongInt; EndPos: LongInt; begin OpenTag := '[' + Tag + ']'; CloseTag := '[/' + Tag + ']'; OutString := InString; if IncludeIt = True then begin OutString := ReplaceStuff(OpenTag, '', OutString); Result := ReplaceStuff(CloseTag, '', OutString); Exit; end else begin while Pos(OpenTag, OutString) > 0 do begin StartPos := Pos(OpenTag, OutString); EndPos := PosEx(CloseTag, OutString, StartPos); if EndPos > 0 then begin EndPos := EndPos + Length(CloseTag); OutString := Copy(OutString, 1, StartPos-1) + Copy(OutString, EndPos, Length(OutString) - (EndPos-1)); end else begin //No end tag -- delete the open tag only OutString := Copy(OutString, 1, StartPos-1) + Copy(OutString, StartPos + Length(OpenTag), Length(OutString) - ((StartPos + Length(OpenTag))-1)); end; end; Result := OutString; end; end; function InsertMetaTag(WebPage, ProgName, UserName: string): string; //This inserts a meta tag into the page identifying the app //and user who created it. var MetaTag: string; HeadPos: LongInt; begin //Find the position to insert at HeadPos := Pos('', UpperCase(WebPage)); //Insert the string if HeadPos > 0 then begin MetaTag := '' else MetaTag := MetaTag + 'UNREGISTERED.">'; MetaTag := MetaTag + ''; Insert(MetaTag, WebPage, HeadPos + 6); end; Result := WebPage; end; function RemoveJSComments(InString: string): string; var StringList: TStringList; i: integer; InJavaScript: Boolean; begin StringList := TStringList.Create; //Set the result to the input in case of cockups Result := InString; try StringList.Text := InString; if StringList.Count < 1 then Exit; InJavaScript := False; i := StringList.Count - 1; while (i > 0) do begin if InJavaScript then begin if Copy(StringList[i], 1, 2) = '//' then if Pos(Copy(StringList[i], 1, 4), '//' then InJavaScript := True; dec(i); end; Result := StringList.Text; finally StringList.Free; end; end; function RemoveJSComments2(InString: string): string; //This function assumes that we're only processing a block of pure JavaScript, and //doesn't allow for any surrounding HTML tags var SL: TStringList; i: integer; j: integer; CommentPos: integer; QuoteCount: integer; begin //Set the result to the input in case of cockups Result := InString; //Remove block-level comments in one go GetTextBetweenExtra(InString, '/*', '*/', '', True, True); SL := TStringList.Create; try SL.Text := InString; if SL.Count < 1 then Exit; i := SL.Count - 1; while (i > 0) do begin CommentPos := Pos('//', SL[i]); if (CommentPos > 0) then begin if (CommentPos = 1) then SL[i] := Copy(SL[i], 1, CommentPos - 1) else //Check it's not part of a string literal -- this only takes account of //strings with single quotes because we're working on our own files begin QuoteCount := 0; for j := Length(SL[i]) downto CommentPos do if (SL[i][j] = '''') then inc(QuoteCount); if QuoteCount mod 2 = 0 then //it shouldn't be inside a string literal SL[i] := Copy(SL[i], 1, CommentPos - 1); end; end; if Length(SL[i]) < 1 then SL.Delete(i); dec(i); end; Result := SL.Text; finally SL.Free; end; end; function SeparateJavaScript(var InString: string; HTMLFileName: string): Boolean; var JSCode: TStringList; JSFileName: string; JSLinkTag: string; StartOpenTag, EndOpenTag, StartCloseTag, EndCloseTag: integer; begin //Default return Result := False; //Is there any JavaScript? StartOpenTag := Pos(''; //Find each piece of script and extract it while StartOpenTag > 0 do begin //Find the relevant starts and ends for the script and its tags EndOpenTag := StartOpenTag; while (InString[EndOpenTag] <> '>') and (EndOpenTag < Length(InString)) do inc(EndOpenTag); StartCloseTag := PosEx('', InString, EndOpenTag); if StartCloseTag < 1 then break; EndCloseTag := StartCloseTag + 9; //Grab the script JSCode.Add(Copy(InString, EndOpenTag+1, (StartCloseTag-(EndOpenTag+1)))); //Take the whole script tag out of the page InString := Copy(InString, 1, StartOpenTag-1) + Copy(InString, EndCloseTag+1, Length(InString)-EndCloseTag); //Get the next bit of script StartOpenTag := Pos(' 0 then begin JSCode.SaveToFile(JSFileName); //Insert the link in the file Insert(JSLinkTag, InString, Pos('',InString)); Result := True; end; finally JSCode.Free; end; end; function SeparateFirstJavaScript(var InString: string; HTMLFileName: string): Boolean; var JSCode: TStringList; JSFileName: string; JSLinkTag: string; StartOpenTag, EndOpenTag, StartCloseTag, EndCloseTag: integer; begin //Default return Result := False; //Is there any JavaScript? StartOpenTag := Pos(''; //Find each piece of script and extract it if StartOpenTag > 0 then begin //Find the relevant starts and ends for the script and its tags EndOpenTag := StartOpenTag; while (InString[EndOpenTag] <> '>') and (EndOpenTag < Length(InString)) do inc(EndOpenTag); StartCloseTag := PosEx('', InString, EndOpenTag); //If no close tag, then it's ill-formed and we should leave if StartCloseTag < 1 then begin Exit; end; EndCloseTag := StartCloseTag + 9; //Grab the script JSCode.Add(Copy(InString, EndOpenTag+1, (StartCloseTag-(EndOpenTag+1)))); //Take the whole script tag out of the page InString := Copy(InString, 1, StartOpenTag-1) + Copy(InString, EndCloseTag+1, Length(InString)-EndCloseTag); end; //If anything was found, save it if JSCode.Count > 0 then begin JSCode.SaveToFile(JSFileName); //Insert the link in the file Insert(JSLinkTag, InString, Pos('',InString)); Result := True; end; finally JSCode.Free; end; end; //Removes iffy chars and spaces from file names function CleanupFileName(InName: string): string; const BadChars: set of Char = ['`','''','"',' ','(',')',':',';','{','}','[',']',',','+']; var i: integer; begin Result := ''; for i := 1 to Length(InName) do if not (InName[i] in BadChars) then Result := Result + InName[i]; end; function TextToAudioFileName(Text: WideString; Extension: WideString): WideString; var i: integer; begin Result := WideString(''); if Length(Text) < 1 then Exit; for i := 1 to Length(Text) do begin if Ord(Text[i]) in [48..57,97..122] then Result := Result + Text[i] else if Ord(Text[i]) in [65..90] then Result := Result + WideLowerCase(Text[i]) else if Text[i] = WideChar(' ') then Result := Result + WideChar('_') else if Text[i] = WideChar('''') then Result := Result + WideChar('7') else Result := Result + WideChar('-'); end; if Length(Extension) > 0 then begin if Extension[1] <> WideChar('.') then Extension := WideChar('.') + Extension; Result := Result + Extension; end; end; function AudioFileNameToText(AudioFileName: WideString; Extension: WideString): WideString; var i: integer; begin Result := WideString(''); if Length(AudioFileName) < 1 then Exit; if Length(Extension) > 0 then begin if Pos(Extension, AudioFileName) = (Length(AudioFileName) - Length(Extension))+1 then AudioFileName := Copy(AudioFileName, 1, Length(AudioFileName)-Length(Extension)); end; for i := 1 to Length(AudioFileName) do begin if Ord(AudioFileName[i]) in [48..54,56..57,65..90,97..122] then Result := Result + AudioFileName[i] else if AudioFileName[i] = WideChar('_') then Result := Result + WideChar(' ') else if AudioFileName[i] = WideChar('7') then Result := Result + WideChar('''') else Result := Result + WideChar('?'); end; end; function SLIndexOf(SList: TStringList; S: string; CaseSensitive: Boolean): integer; var i: integer; begin if CaseSensitive = False then begin Result := SList.IndexOf(S); Exit; end; Result := -1; //default return if SList.Count < 1 then Exit; for i := 0 to SList.Count-1 do if SList[i] = S then Result := i; end; //WideString functions function WPosEx(const SubStr: WideString; const S: WideString; StartIndex: integer): integer; var Temp: WideString; begin //Make a copy of the string starting from StartIndex Temp := Copy(S, StartIndex, Length(S) - (StartIndex-1)); Result := Pos(SubStr, Temp); if Result > 0 then Result := Result + (StartIndex-1); end; function WidePosEx(const SubStr, S: WideString; Offset: Integer = 1): Integer; //Adapted Delphi code var I,X: Integer; Len, LenSubStr: Integer; begin if Offset = 1 then Result := Pos(SubStr, S) else begin if Offset < 0 then begin Result := 0; exit; end; I := Offset; LenSubStr := Length(SubStr); Len := Length(S) - LenSubStr + 1; while I <= Len do begin if S[I] = SubStr[1] then begin X := 1; while (X < LenSubStr) and (S[I + X] = SubStr[X + 1]) do Inc(X); if (X = LenSubStr) then begin Result := I; exit; end; end; Inc(I); end; Result := 0; end; end; function WideStringToANSI(WS: WideString): string; var l: integer; begin if WS = '' then Result := '' else begin l := WideCharToMultiByte(CP_ACP, 0, @WS[1], -1, nil, 0, nil, nil); SetLength(Result, l-1); if l > 1 then WideCharToMultiByte(CP_ACP, 0, @WS[1], -1, @Result[1], l-1, nil, nil); end; end; //Places a backslash before each single quote function WEscapeSingleQuotes(InString: WideString): WideString; var i: LongInt; OutString: WideString; begin OutString := ''; for i := 1 To Length(InString) do begin if (InString[i] = '''') or (InString[i] = '\') then OutString := OutString + '\'; OutString := OutString + InString[i]; end; Result := OutString; end; //Places a backslash before each double quote function WEscapeDoubleQuotes(InString: WideString): WideString; var i: LongInt; OutString: WideString; begin OutString := ''; for i := 1 To Length(InString) do begin Application.ProcessMessages; if InString[i] = '"' then OutString := OutString + '\'; OutString := OutString + InString[i]; end; Result := OutString; end; function WEscapeAllQuotes(InString: WideString): WideString; var i: LongInt; OutString: WideString; begin OutString := ''; for i := 1 To Length(InString) do begin Application.ProcessMessages; if InString[i] = WideChar('"') then OutString := OutString + '\u0022' else if InString[i] = WideChar('''') then OutString := OutString + '\u0027' else OutString := OutString + InString[i]; end; Result := OutString; end; function WMakeCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString; begin Result := WideTrim(InString); //Do ampersands first, so other entities aren't double-escaped Result := StringReplace(Result, '&', '&', [rfReplaceAll]); if QuoteChar = '"' then Result := StringReplace(Result, '"', '"', [rfReplaceAll]); if QuoteChar = '''' then Result := StringReplace(Result, '''', ''', [rfReplaceAll]); Result := StringReplace(Result, #13, ' ', [rfReplaceAll]); Result := StringReplace(Result, #10, '', [rfReplaceAll]); Result := StringReplace(Result, '<', '<', [rfReplaceAll]); Result := StringReplace(Result, '>', '>', [rfReplaceAll]); end; function WUriAsCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString; begin Result := WideTrim(InString); //Do ampersands first, so other entities aren't double-escaped. //First unescape any, so that we know where we stand. Result := StringReplace(Result, '&', '&', [rfReplaceAll]); //Now escape them all. Result := StringReplace(Result, '&', '&', [rfReplaceAll]); if QuoteChar = '"' then Result := StringReplace(Result, '"', '"', [rfReplaceAll]); if QuoteChar = '''' then Result := StringReplace(Result, '''', ''', [rfReplaceAll]); Result := StringReplace(Result, #13, ' ', [rfReplaceAll]); Result := StringReplace(Result, #10, '', [rfReplaceAll]); Result := StringReplace(Result, '<', '<', [rfReplaceAll]); Result := StringReplace(Result, '>', '>', [rfReplaceAll]); end; function WUnescapeXMLAttribute(InString: WideString): WideString; begin Result := WideTrim(InString); //Do ampersands first, so other entities aren't double-escaped. Result := StringReplace(Result, '&', '&', [rfReplaceAll]); Result := StringReplace(Result, '&', '&', [rfReplaceAll]); Result := StringReplace(Result, '"', '"', [rfReplaceAll]); Result := StringReplace(Result, ''', '''', [rfReplaceAll]); Result := StringReplace(Result, '<', '<', [rfReplaceAll]); Result := StringReplace(Result, '>', '>', [rfReplaceAll]); Result := StringReplace(Result, '<', '<', [rfReplaceAll]); Result := StringReplace(Result, '>', '>', [rfReplaceAll]); end; function WEscapeAngleBrackets(InString: Widestring): Widestring; var i: LongInt; OutString: WideString; begin OutString := ''; for i := 1 To Length(InString) do begin Application.ProcessMessages; if InString[i] = '<' then OutString := OutString + WideString('<') else if InString[i] = '>' then OutString := OutString + WideString('>') else OutString := OutString + InString[i]; end; Result := OutString; end; function WNormalizeReturns(InString: WideString): WideString; var i: integer; wsTemp: WideString; begin Result := InString; if Length(InString) < 1 then Exit; wsTemp := WideString('*&^%$#'); Result := StringReplace(Result, WideString(#10#10), wsTemp, [rfReplaceAll]); Result := StringReplace(Result, WideString(#13#10), wsTemp, [rfReplaceAll]); Result := StringReplace(Result, WideString(#10), wsTemp, [rfReplaceAll]); Result := StringReplace(Result, WideString(#13), wsTemp, [rfReplaceAll]); Result := StringReplace(Result, wsTemp, WideString(#13#10), [rfReplaceAll]); end; function WideStringToHTMLNumeric(InString: WideString): string; var i: integer; CharNum: integer; begin //initialize, otherwise repeated calls to the function will simply //append to the Result variable Result := ''; for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(InString[i]); if (CharNum = 13) then Result := Result + #13#10 else if (CharNum < 128) and (CharNum <> 10) then Result := Result + Chr(CharNum) else if (Ord(InString[i])<>10) then Result := Result + '&#' + IntToStr(Ord(InString[i])) + ';'; end; end; function WideStringToHTMLNumericAbove255(InString: WideString): string; var i: integer; CharNum: integer; begin //initialize, otherwise repeated calls to the function will simply //append to the Result variable Result := ''; for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(InString[i]); if (CharNum = 13) then Result := Result + #13#10 else if (CharNum < 256) and (CharNum <> 10) then Result := Result + Chr(CharNum) else if (Ord(InString[i])<>10) then Result := Result + '&#' + IntToStr(Ord(InString[i])) + ';'; end; end; function WideStringToHTMLHex(InString: WideString): string; var i: integer; CharNum: integer; begin //initialize, otherwise repeated calls to the function will simply //append to the Result variable Result := ''; for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(InString[i]); if (CharNum = 13) then Result := Result + #13#10 else if (CharNum < 128) and (not (CharNum in [10,38,60,62])) then //Need to escape angle brackets and ampersands Result := Result + Chr(CharNum) else if (Ord(InString[i])<>10) then Result := Result + '&#x' + IntToHex(Ord(InString[i]), 4) + ';'; end; end; function WideStringToHTMLOutput(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion var i: integer; CharNum: integer; begin //initialize, otherwise repeated calls to the function will simply //append to the Result variable Result := ''; InString := WConvertUnmatchedAngleBrackets(InString); InString := WReturnsToBR(InString); if ProcessRTL then InString := ProcessForRTLBoundaries(InString, False); for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(InString[i]); if (CharNum < 128) then Result := Result + Chr(CharNum) else if (Ord(InString[i])<>10) then Result := Result + '&#x' + IntToHex(Ord(InString[i]), 4) + ';'; end; end; function WideStringToHTMLOutputCentredRTL(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion var i: integer; CharNum: integer; begin //initialize, otherwise repeated calls to the function will simply //append to the Result variable Result := ''; InString := WConvertUnmatchedAngleBrackets(InString); InString := WReturnsToBR(InString); if ProcessRTL then InString := ProcessForRTLBoundaries(InString, True); for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(InString[i]); if (CharNum < 128) then Result := Result + Chr(CharNum) else if (Ord(InString[i])<>10) then Result := Result + '&#x' + IntToHex(Ord(InString[i]), 4) + ';'; end; end; function WideStringToJSUnicode(InString: WideString): string; var i: integer; CharNum: integer; begin Result := ''; for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(InString[i]); if (CharNum = 13) then Result := Result + ' ' else if ((CharNum = 92) or (CharNum = 39)) then //backslash or apostrophe need to be escaped Result := Result + '\' + Chr(CharNum) else if (CharNum < 128) and (CharNum <> 10) then Result := Result + Chr(CharNum) else if (Ord(InString[i])<>10) then Result := Result + '\u' + IntToHex(Ord(InString[i]), 4); end; end; function WideStringToWideJSUnicode(InString: WideString): WideString; var i: integer; CharNum: integer; begin Result := WideString(''); for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(WideChar(InString[i])); if (CharNum = 13) then Result := Result + WideString(' ') else if ((CharNum = 92) or (CharNum = 39)) then //backslash or apostrophe need to be escaped Result := Result + WideString('\') + WideChar(CharNum) else if (CharNum <> 10) then Result := Result + WideChar(CharNum); end; end; function WideStringToExplanation(InString: WideString): string; var i: integer; CharNum: integer; DecNums, HexNums: string; begin DecNums := 'Dec: '; HexNums := 'Hex: '; if Length(InString) > 0 then begin for i := 1 to Length(InString) do begin Application.ProcessMessages; CharNum := Ord(WideChar(InString[i])); DecNums := DecNums + ' ' + IntToStr(CharNum) + ' '; HexNums := HexNums + ' ' + IntToHex(CharNum, 4) + ' '; end; Result := DecNums + ' :: ' + HexNums; end; end; function WReturnsToBR(InString: WideString): WideString; //var //i: integer; //CharNum: integer; begin Result := WRemoveDoubleSpaces(InString); Result := WideStringReplace(Result,#13#10, '
', [rfReplaceAll]); //Replace remaining #13s in case #10s are missing Result := WideStringReplace(Result,#13, '
', [rfReplaceAll]); //New section -- doesn't screw up table code Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); //Another new section -- don't screw up lists! Result := WideStringReplace(Result,'

', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); //Yet another section -- object tags! Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); Result := WideStringReplace(Result,'
', '', [rfReplaceAll]); { Result := WideString(''); for i := 1 to Length(InString) do begin CharNum := Ord(WideChar(InString[i])); if (CharNum = 13) then Result := Result + WideString('
') else if (CharNum <> 10) then Result := Result + WideChar(CharNum); end; } end; function WideStringToJSUnicodeEx(InString: WideString): string; var i: integer; CharNum: integer; begin Result := ''; for i := 1 to Length(InString) do begin CharNum := Ord(InString[i]); if (CharNum = 13) then Result := Result + ' ' else if (Ord(InString[i])<>10) then Result := Result + '\u' + IntToHex(Ord(InString[i]), 4); end; end; function WideStringToJSUnicodeExNoTags(InString: WideString): string; var i: integer; CharNum: integer; InTag: Boolean; begin Result := ''; InTag := False; for i := 1 to Length(InString) do begin CharNum := Ord(InString[i]); Case CharNum of 60: begin InTag := True; Result := Result + '<'; end; 62: begin InTag := False; Result := Result + '>'; end; 13: Result := Result + ' '; 10: else begin if InTag = False then Result := Result + '\u' + IntToHex(Ord(InString[i]), 4) else Result := Result + Char(Ord(InString[i])); end; end; //End case end; end; function WideStringFromHTMLNumeric(InString: string): WideString; var NumString: string; i: integer; j: integer; NewWChar: WideChar; CharNum: integer; begin Result := ''; if Length(InString) < 1 then Exit; i := 1; while i <= Length(InString) do begin if Copy(InString, i, 2) = '&#' then begin if Copy(InString, i, 3) = '&#x' then //It's an HTML hex begin NumString := ''; j := i+3; while (InString[j] <> ';') and (j<=Length(InString)) do begin NumString := NumString + InString[j]; inc(j); end; if InString[j] <> ';' then begin Result := Result + WideString(NumString); Exit; end; CharNum := StrToIntDef('$' + NumString, 63); if CharNum < $FFFF then begin NewWChar := WideChar(Word(CharNum)); Result := Result + NewWChar; end; i := j+1; end else //It's a decimal begin NumString := ''; j := i+2; while (InString[j] <> ';') and (j<=Length(InString)) do begin NumString := NumString + InString[j]; inc(j); end; if InString[j] <> ';' then begin Result := Result + WideString(NumString); Exit; end; CharNum := StrToInt(NumString); if CharNum < $FFFF then begin NewWChar := WideChar(Word(CharNum)); Result := Result + NewWChar; end; i := j+1; end; end else //it's a straight character begin Result := Result + WideChar(InString[i]); inc(i); end; end; end; function WideStringFromWSHTMLNumeric(InString: WideString): WideString; var NumString: WideString; i: integer; j: integer; NewWChar: WideChar; CharNum: integer; begin Result := ''; if Length(InString) < 1 then Exit; i := 1; while i <= Length(InString) do begin if Copy(InString, i, 2) = '&#' then begin if Copy(InString, i, 3) = '&#x' then //It's an HTML hex begin NumString := ''; j := i+3; while (InString[j] <> ';') and (j<=Length(InString)) do begin NumString := NumString + InString[j]; inc(j); end; if InString[j] <> ';' then begin Result := Result + WideString(NumString); Exit; end; CharNum := StrToIntDef('$' + NumString, 63); if CharNum < $FFFF then begin NewWChar := WideChar(Word(CharNum)); Result := Result + NewWChar; end; i := j+1; end else //It's a decimal begin NumString := ''; j := i+2; while (InString[j] <> ';') and (j<=Length(InString)) do begin NumString := NumString + InString[j]; inc(j); end; if InString[j] <> ';' then begin Result := Result + WideString(NumString); Exit; end; CharNum := StrToInt(NumString); if CharNum < $FFFF then begin NewWChar := WideChar(Word(CharNum)); Result := Result + NewWChar; end; i := j+1; end; end else //it's a straight character begin Result := Result + WideChar(InString[i]); inc(i); end; end; end; function HDecimalUCodeToJHexUCode(InString: string): string; //Converts &#____; decimal chars to \u____ JavaScript escapes var NumString: string; i: integer; j: integer; CharNum: integer; begin if (Pos('&#', InString) < 1) or (Length(InString) < 1) then begin Result := InString; Exit; end; i := 1; while i <= Length(InString) do begin if Copy(InString, i, 2) = '&#' then begin if Copy(InString, i, 3) = '&#x' then //It's an HTML hex begin NumString := ''; j := i+3; while (InString[j] <> ';') and (InString[j] in ['0'..'9']) and (j<=Length(InString)) do begin NumString := NumString + InString[j]; inc(j); end; if InString[j] <> ';' then begin Result := Result + NumString; Exit; end; CharNum := StrToIntDef('$' + NumString, 63); if CharNum < $FFFF then Result := Result + '\u' + NumString; i := j+1; end else //It's a decimal begin NumString := ''; j := i+2; while (InString[j] <> ';') and (InString[j] in ['0'..'9']) and (j<=Length(InString)) do begin NumString := NumString + InString[j]; inc(j); end; if InString[j] <> ';' then begin Result := Result + NumString; Exit; end; CharNum := StrToInt(NumString); if CharNum < $FFFF then begin NumString := IntToHex(CharNum, 4); Result := Result + '\u' + NumString; end; i := j+1; end; end else begin Result := Result + InString[i]; inc(i); end; end; end; function JHexUCodeToHDecimalUCode(InString: string): string; //Converts \u____ JavaScript escapes to &#____; decimal chars var NumString: string; i: integer; CharNum: integer; begin if (Pos('\u', InString) < 1) or (Length(InString) < 1) then begin Result := InString; Exit; end; i := 1; while i <= Length(InString) do begin if Copy(InString, i, 2) = '\u' then begin NumString := Copy(InString, i+2, 4); CharNum := StrToIntDef('$' + NumString, 63); if CharNum < $FFFF then Result := Result + '&#' + IntToStr(CharNum) + ';'; i := i+6; end else begin Result := Result + InString[i]; inc(i); end; end; end; function JHexUCodeToWideString(InString: Widestring): WideString; //Converts \u____ JavaScript escapes to Unicode chars var NumString: string; i: integer; CharNum: integer; begin if (Pos('\u', InString) < 1) or (Length(InString) < 1) then begin Result := InString; Exit; end; i := 1; while i <= Length(InString) do begin if Copy(InString, i, 2) = '\u' then begin NumString := Copy(InString, i+2, 4); CharNum := StrToIntDef('$' + NumString, 63); if CharNum < $FFFF then Result := Result + WideChar(Word(CharNum)); i := i+6; end else begin Result := Result + InString[i]; inc(i); end; end; end; function PCCharToJSUnicode(InString: string): string; var i: integer; begin if Length(InString) < 1 then begin Result := ''; Exit; end; for i := 1 to Length(InString) do if Ord(InString[i]) > 127 then begin if (Ord(InString[i]) < 160) then Result := Result + '\u' + IntToHex(GetDecUnicodeNumFromWinNum(Ord(InString[i])), 4) else Result := Result + '\u' + IntToHex(Ord(InString[i]), 4); end else Result := Result + InString[i]; end; function WAllCharsToJSUnicode(InString: WideString): WideString; var i: LongInt; OutString: WideString; begin OutString := ''; for i := 1 to Length(InString) do OutString := OutString + '\u' + UpperCase(IntToHex(Ord(InString[i]), 4)); Result := OutString; end; function WAllCharsToHTMLHex(InString: WideString): WideString; var i: LongInt; OutString: WideString; begin OutString := ''; for i := 1 to Length(InString) do OutString := OutString + '&#x' + UpperCase(IntToHex(Ord(InString[i]), 4)) + ';'; Result := OutString; end; function PCCharToJSUnicodeEx(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption var i: integer; begin Result := ''; if Length(InString) < 1 then begin Exit; end; for i := 1 to Length(InString) do begin if (Ord(InString[i]) < 160) and (Ord(InString[i]) > 127) then Result := Result + '\u' + IntToHex(GetDecUnicodeNumFromWinNum(Ord(InString[i])), 4) else Result := Result + '\u' + IntToHex(Ord(InString[i]), 4); end; end; function PCCharToHTMLNumeric(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption var InTag: Boolean; //If we're inside a tag, do no conversion to avoid destroying HTML i: integer; begin Result := ''; InTag := False; if Length(InString) < 1 then begin Exit; end; for i := 1 to Length(InString) do begin if InString[i] = '<' then InTag := True; if not InTag then begin if (Ord(InString[i]) < 160) and (Ord(InString[i]) > 127) then Result := Result + '&#' + IntToStr(GetDecUnicodeNumFromWinNum(Ord(InString[i]))) + ';' else Result := Result + '&#' + IntToStr(Ord(InString[i])) + ';'; end else Result := Result + InString[i]; if InString[i] = '>' then InTag := False; end; end; function WTagsToLowerCase(InString: WideString): WideString; var i: integer; InTag: Boolean; InSQuotes: Boolean; InDQuotes: Boolean; begin Result := ''; if Length(InString) < 1 then Exit; InTag := False; InDQuotes := False; InSQuotes := False; for i := 1 to Length(InString) do begin if InString[i] = WideChar('>') then InTag := False; if (InTag = True) and (InString[i] = WideChar('>')) then InTag := False; if InTag = True then begin if InString[i] = WideChar('"') then InDQuotes := not InDQuotes; if InString[i] = WideChar('''') then InSQuotes := not InSQuotes; end; if InString[i] = WideChar('<') then InTag := True; if not InTag or (InDQuotes or InSQuotes) then Result := Result + InString[i] else Result := Result + WideLowerCase(InString[i]); end; end; function MakeRTLRightAlignedPara(InString: string): string; var WS: WideString; begin Result := InString; //Use the first and last char to determine RTL status WS := WideStringFromHTMLNumeric(InString); if (UnicodeIsRightToLeft(Cardinal(WS[1]))) and (UnicodeIsRightToLeft(Cardinal(WS[Length(WS)]))) then Result := '

' + InString + '

' end; function FirstNonPuncCharIsRTL(InString: WideString): Boolean; var i: integer; begin Result := False; if Length(InString) < 1 then Exit; i := 1; while (UnicodeIsPunctuation(Cardinal(InString[i]))) and (i < Length(InString)) do inc(i); Result := UnicodeIsRightToLeft(Cardinal(InString[i])); end; function LastNonPuncCharIsRTL(InString: WideString): Boolean; var i: integer; begin Result := False; if Length(InString) < 1 then Exit; i := Length(InString); while ((UnicodeIsPunctuation(Cardinal(InString[i]))) or (UnicodeIsMark(Cardinal(InString[i])))) and (i > 1) do dec(i); Result := UnicodeIsRightToLeft(Cardinal(InString[i])); end; function HasMoreRTLCharsThanLTRChars(InString: WideString): Boolean; var RTLChars: integer; i: integer; begin Result := False; if Length(InString) < 1 then Exit; RTLChars := 0; for i := 1 to Length(InString) do if UnicodeIsRightToLeft(Cardinal(InString[i])) then inc(RTLChars); Result := (RTLChars > (Length(InString)/2)); end; function TextIsMostLikelyRTL(InString: WideString): Boolean; begin Result := False; if ((FirstNonPuncCharIsRTL(InString)) and (LastNonPuncCharIsRTL(InString))) or (HasMoreRTLCharsThanLTRChars(InString)) then Result := True; end; function InsertRTLSpans(InString: WideString): WideString; var i: integer; InRTL: Boolean; InTag: Boolean; begin Result := ''; if Length(InString) < 1 then Exit; InTag := False; InRTL := UnicodeIsRightToLeft(Cardinal(InString[1])); if InRTL then Result := ''; for i := 1 to Length(InString) do begin if (InString[i] = '<') and (not InTag) then begin InTag := True; if InRTL then begin Result := Result + ''; InRTL := False; end; end; if InTag then Result := Result + InString[i] else begin if InRTL then begin if WUnicodeChangesDirectionality(True, InString[i]) then begin Result := Result + ''; InRTL := False; end; Result := Result + InString[i] end else begin if UnicodeIsRightToLeft(Cardinal(InString[i])) then begin InRTL := True; Result := Result + ''; end; Result := Result + InString[i]; end; end; if (InString[i] = '>') and (InTag) then InTag := False; end; if InRTL then Result := Result + ''; end; function ProcessForRTLBoundaries(InString: WideString; Centred: Boolean): WideString; var CSSClass: WideString; begin Result := InString; if Centred then CSSClass := 'CentredRTLText' else CSSClass := 'RTLText'; if Length(InString) < 1 then Exit; if FirstNonPuncCharIsRTL(InString) then begin if LastNonPuncCharIsRTL(InString) then begin Result := '

' + InString + '

'; Exit; end else begin if HasMoreRTLCharsThanLTRChars(InString) then begin Result := '

' + InString + '

'; Exit; end else begin Result := InsertRTLSpans(InString); Exit; end; end; end else begin if LastNonPuncCharIsRTL(InString) then begin if HasMoreRTLCharsThanLTRChars(InString) then begin Result := '

' + InString + '

'; Exit; end else begin Result := InsertRTLSpans(InString); Exit; end; end else begin Result := InsertRTLSpans(InString); end; end; end; function TextIsMostLikelyCJK(InString: WideString): Boolean; var i: integer; FirstChar, LastChar: WideChar; begin Result := False; if Length(InString) < 1 then Exit; i := 1; while (i<=Length(InString)) and (WUnicodeIsSpacePunc(InString[i])) do Inc(i); FirstChar := InString[i]; i := Length(InString); while (i>1) and (WUnicodeIsSpacePunc(InString[i])) do Dec(i); LastChar := InString[i]; Result := (WUnicodeIsCJK(FirstChar) and WUnicodeIsCJK(LastChar)); end; function ParseWideStringToPerceivedChars(var CharList: TWideStringList; WInString: WideString; RemoveAscii: Boolean): Boolean; //This function takes a widestring text and splits it into "perceived characters", each //assigned to one position in the list. "Perceived" means that each character will //have its own position, unless it has following combining diacritical marks; in that //case, the char + diacritics will be grouped together. var i: integer; CharNum: Cardinal; CodeBlock: TUnicodeBlock; PercChar: WideString; Len: integer; WS: WideString; begin //Default return Result := False; try //Clear the list CharList.Clear; //Exit if no data Len := Length(WInString); if Len < 1 then Exit; //Set up the list // CharList.Duplicates := dupIgnore; //Can't do this -- it ignores lowercase/uppercase //Strip returns etc. WS := ''; for i := 1 to Len do begin CharNum := Cardinal(WInString[i]); if not (CharNum in [13,10]) then if (not UnicodeIsLineSeparator(CharNum)) and (not UnicodeIsParagraphSeparator(CharNum)) then WS := WS + WInString[i]; end; Len := Length(WS); //Go through the chars i := 1; while i < Len do begin PercChar := WS[i]; inc(i); if (i>Len) then Break; CodeBlock := CodeBlockFromChar(UCS4(WS[i])); while ((CodeBlock in [ubCombiningDiacriticalMarks,ubCombiningMarksForSymbols,ubCombiningHalfMarks]) and (i<=Len)) do begin PercChar := PercChar + WS[i]; inc(i); CodeBlock := CodeBlockFromChar(UCS4(WS[i])); end; if not RemoveAscii then CharList.Append(PercChar) else if (Length(PercChar) > 1) or (PercChar[1] > #$007F) then CharList.Append(PercChar); PercChar := ''; end; //Add the last one if necessary if i <= Len then if WS[Len] > #$007F then CharList.Append(WideString(WS[Len])); //Return success if any entries added Result := (CharList.Count > 0); except Result := False; end; end; //Shows returns in a string by inserting a character #172 before each one function ShowReturns(InString: string): string; var i: integer; begin Result := ''; if Length(InString) < 1 then Exit; for i := 1 to Length(InString) do if InString[i] = #13 then Result := Result + #172#13 else Result := Result + InString[i]; end; function UniqueIDFromTime: string; begin Result := FloatToStr(Now); Result := ReplaceStuff('.', '', Result); Result := ReplaceStuff(',', '', Result); end; procedure CopyUnicodeTextToClipboard(InText: WideString); var DataHandle : THandle; FromPointer: Pointer; ToPointer : Pointer; WS : WideString; begin WS := InText; SetLength(WS, Length(WS)+1); WS[Length(WS)] := WideChar($0000); // Null-terminator DataHandle := GlobalAlloc(GMEM_DDESHARE OR GMEM_MOVEABLE, Length(WS)*SizeOf(WChar)); try ToPointer := GlobalLock(DataHandle); //NOTE: THE FOLLOWING LINE LOOKS WRONG! SHOULD IT BE ...@WS[1]? FromPointer := @InText[1]; Move(FromPointer^, ToPointer^, Length(WS)*SizeOf(WChar)); finally GlobalUnlock(DataHandle); end; OpenClipboard(Application.Handle); // EmptyClipboard; SetClipboardData(CF_UNICODETEXT, DataHandle); CloseClipboard; end; function WGetUnicodePunctuation(InString: WideString): string; var i: integer; Temp: string; begin Result := ''; if Length(InString) < 1 then Exit; for i := 1 to Length(InString) do if UnicodeIsPunctuation(Cardinal(InString[i])) then begin Temp := '\u' + IntToHex(Ord(InString[i]), 4); if Pos(Temp, Result) < 1 then Result := Result + Temp; end; end; function WGetUnicodeOpenPunctuation(InString: WideString): string; var i: integer; Temp: string; begin Result := ''; if Length(InString) < 1 then Exit; //Include ordinary quotes as well as initial and open, because these can be initial for i := 1 to Length(InString) do if (UnicodeIsInitialPunctuation(Cardinal(InString[i]))) or (UnicodeIsOpenPunctuation(Cardinal(InString[i]))) or (UnicodeIsQuotationMark(Cardinal(InString[i]))) then begin Temp := '\u' + IntToHex(Ord(InString[i]), 4); if Pos(Temp, Result) < 1 then Result := Result + Temp; end; end; function WUnicodeIsSpacePunc(InChar: WideChar): Boolean; begin Result := UnicodeIsPunctuation(Cardinal(InChar)) or UnicodeIsSpace(Cardinal(InChar)) or (InChar in [WideChar(#13), WideChar(#10)]); end; function WUnicodeIsReturn(InChar: WideChar): Boolean; begin Result := (UnicodeIsLineSeparator(Cardinal(InChar)) or (UnicodeIsParagraphSeparator(Cardinal(InChar)))) or (InChar in [WideChar(#13), WideChar(#10)]); end; function WUnicodeIsCJK(InChar: WideChar): Boolean; begin Result := CodeBlockFromChar(UCS4(InChar)) in [ubCJKRadicalsSupplement..ubHangulSyllables, ubCJKCompatibilityIdeographs, ubCJKCompatibilityForms, ubCJKUnifiedIdeographsExtensionB, ubCJKCompatibilityIdeographsSupplement]; end; function WUnicodeIsCombining(InChar: WideChar): Boolean; begin //Tweaked to comment out the second condition 26/04/04 -- no known //reason for it, and it breaks other functions! Result := UnicodeIsMark(Cardinal(InChar)){ or (Ord(InChar) < 256)}; end; function WUnicodeChangesDirectionality(RTL: Boolean; InChar: WideChar): Boolean; begin if (UnicodeIsRightToLeft(Cardinal(InChar)) = RTL) or (WUnicodeIsCombining(InChar)) or (WUnicodeIsSpacePunc(InChar)) then Result := False else Result := True; end; function BuildWebKeypad(InString: WideString; ProcessForRTL: Boolean): string; //This function is contains an adaptation of the StringFunctions //ParseWideStringToPerceivedChars function -- they may be merged at some stage var CharList: TTntStringList; WText: WideString; i,k: integer; CharNum: Cardinal; //CodeBlock: Cardinal; PercChar: WideString; Len: integer; WS: WideString; AlreadyInList: Boolean; IsMark: Boolean; begin Result := ''; CharList := TTntStringList.Create; try CharList.Sorted := True; CharList.Duplicates := dupAccept; //dupIgnore doesn't work!!!! Thinks all sorts of chars are identical! WText := WideTrim(InString); //Exit if no data Len := Length(WText); if Len < 1 then Exit; //Strip returns etc. WS := ''; for i := 1 to Len do begin CharNum := Cardinal(WText[i]); if not (CharNum in [13,10]) then if ((not UnicodeIsLineSeparator(CharNum)) and (not UnicodeIsParagraphSeparator(CharNum)) and (not UnicodeIsWhiteSpace(CharNum))) then WS := WS + WText[i]; end; Len := Length(WS); //Go through the chars i := 1; while i < Len do begin PercChar := WS[i]; inc(i); if (i>Len) then Break; {//This code modified for HotPot 6.0.2.4; Arabic combining diacritics don't test true //using this system, so UnicodeIsMark used instead CodeBlock := CodeBlockFromChar(WS[i]); while ((CodeBlock in [6,31,62]) and (i<=Len)) do begin PercChar := PercChar + WS[i]; inc(i); CodeBlock := CodeBlockFromChar(WS[i]); end; } IsMark := UnicodeIsMark(Cardinal(WS[i])); while ((IsMark) and (i<=Len)) do begin PercChar := PercChar + WS[i]; inc(i); IsMark := UnicodeIsMark(Cardinal(WS[i])); end; if (Length(PercChar) > 1) or (PercChar[1] > #$007F) then begin if CharList.Count > 0 then begin AlreadyInList := False; for k := 0 to CharList.Count - 1 do if WideStringToHTMLHex(CharList[k]) = WideStringToHTMLHex(PercChar) then AlreadyInList := True; if not AlreadyInList then CharList.Add(PercChar); end else begin CharList.Add(PercChar); end; end; PercChar := ''; end; //Add the last one if necessary if i <= Len then if WS[Len] > #$007F then begin PercChar := WideString(WS[Len]); if CharList.Count > 0 then begin AlreadyInList := False; for k := 0 to CharList.Count - 1 do if WideStringToHTMLHex(CharList[k]) = WideStringToHTMLHex(PercChar) then AlreadyInList := True; if not AlreadyInList then CharList.Add(PercChar); end else begin CharList.Add(PercChar); end; end; //Bail if empty if CharList.Count < 1 then Exit; //Build the keypad for i := 0 to CharList.Count - 1 do begin Result := Result + ' '; //Add a space to allow wrapping in Safari! end; finally CharList.Free; end; end; function WConvertUnmatchedAngleBrackets(InString: WideString): WideString; var i: integer; InTag: Boolean; OutString: WideString; WOpen, WClose: WideChar; begin Result := ''; if Length(InString) < 1 then Exit; WOpen := '<'; WClose := '>'; InTag := False; //Go from the beginning, looking for unmatched for i := 1 to Length(InString) do if InTag then begin if InString[i] = WClose then InTag := False; OutString := OutString + InString[i]; end else begin if InString[i] = WClose then OutString := OutString + '>' else begin if InString[i] = WOpen then InTag := True; OutString := OutString + InString[i]; end; end; //Now go from the end, repeating the process InString := OutString; OutString := ''; InTag := False; for i := Length(InString) downto 1 do if InTag then begin if InString[i] = WOpen then InTag := False; OutString := InString[i] + OutString; end else begin if InString[i] = WOpen then OutString := '<' + OutString else begin if InString[i] = WClose then InTag := True; OutString := InString[i] + OutString; end; end; Result := OutString; end; {This function parses out the tag name, tag content, and attribute values in a WideString XML tag passed to it, and stores the results in a TTntStringList as Name/Value pairs so the values can easily be looked up by the calling function. It stores the tag name and content with special name structures so they are not mistaken for attributes.} function WParseTagOrAttributes(InString: WideString; uslPairs: TTntStringList): Boolean; var i: integer; TagName: WideString; TagContent: WideString; Atts: WideString; CloseTagPos: integer; QuoteChar: WideChar; NameVal: WideString; InsideAtt: Boolean; //TODO: THIS WILL FAIL WITH SELF-CLOSING TAGS!!!!!!!!! function IsTagnameTerminator(InChar: WideChar): Boolean; begin Result := (InChar in [WideChar(' '), WideChar('>'), WideChar('/')]); end; function IsAttsTerminator(InChar: WideChar): Boolean; begin Result := (InChar in [WideChar('>')]); end; begin Result := False; try if Length(InString) < 5 then Exit; uslPairs.Clear; uslPairs.NameValueSeparator := '='; //Initialize everything InString := WideTrim(InString); TagName := ''; Atts := ''; TagContent := ''; //First, is it a tag or just attributes? if InString[1] = '<' then begin //The tag name is everything up to the first space or close angle bracket i := 2; while ((not IsTagnameTerminator(InString[i])) and (i < Length(InString))) do begin TagName := TagName + InString[i]; inc(i); end; //Add the tagname to the list uslPairs.Add('__tagname__=' + TagName); //Get the attributes and tag content if there are any if InString[i] = WideChar(' ') then begin //There are attributes and everything up to the close bracket or slash is part of them Atts := ''; inc(i); while ((not IsAttsTerminator(InString[i])) and (i < Length(InString))) do begin Atts := Atts + InString[i]; inc(i); end; end; //Now get the content and stash it, if there is any if Pos('>', InString) < Length(InString) then begin i := Pos('>', InString) + 1; CloseTagPos := Pos('', InString); if CloseTagPos > i then begin TagContent := Copy(InString, i, CloseTagPos-i); uslPairs.Add('__tagcontent__=' + TagContent); end; end; end else begin //This whole thing is attributes Atts := InString; end; if Length(Atts) > 4 then begin //First, find out the quotechar by checking for the first instance of " or ' QuoteChar := '?'; i := 1; while (QuoteChar = '?') and (i < Length(Atts)) do if Atts[i] in [WideChar(''''), WideChar('"')] then QuoteChar := Atts[i] else inc(i); if QuoteChar = '?' then begin Result := True; Exit; //There are no valid attribute values. end; //Now split the name/value pairs by breaking on every instance of a quotechar //preceding a whitespace NameVal := ''; InsideAtt := False; for i := 1 to Length(Atts) do begin if Atts[i] <> QuoteChar then begin NameVal := NameVal + Atts[i]; end else begin if InsideAtt then //this is the end of a name-val pair begin uslPairs.Add(WideTrim(NameVal)); NameVal := ''; InsideAtt := False; end else //This is the first element in a name-val begin InsideAtt := True; end; end; end; end; Result := True; except Result := False; end; end; //This function assesses the likelihood that a string is actually a Web URL, and //returns true if it seems likely. function WStrIsLikelyURL(InString: WideString): Boolean; var Likelihood: integer; begin Likelihood := 0; //starting point InString := WideTrim(InString); //First, check for a protocol near the beginning if Pos('://', InString) < 7 then inc(Likelihood); //Look at the extension, if there is one if Pos(WideLowerCase(WideExtractFileExt(InString)), WebExts) > 0 then inc(Likelihood); //See if the last char is a slash if InString[Length(InString)] = WideChar('/') then inc(Likelihood); //Return true for any hit -- may be too permissive, but relative links can be //very simple Result := (Likelihood > 0); end; function IsWholeWordHit(wsText: WideString; StartPos, EndPos: integer): Boolean; begin Result := False; //default if ((StartPos = 1) or (WUnicodeIsSpacePunc(wsText[StartPos-1]))) then if ((EndPos = Length(wsText)) or (WUnicodeIsSpacePunc(wsText[EndPos+1]))) then Result := True; end; function ParseAppVersionFromWideString(wsInput: WideString; var verMajor: integer; var verMinor: integer; var verBuild: integer; var verRelease: integer; var wsOutput: WideString): Boolean; var wsTemp: WideString; i: integer; InDelimiter: Boolean; NumsFound: integer; begin Result := False; //default wsInput := WideTrim(wsInput); //Set defaults verMajor := 0; verMinor := 0; verBuild := 0; verRelease := 0; wsTemp := ''; InDelimiter := wsInput[1] in [WideChar('0')..WideChar('9')]; NumsFound := 0; wsOutput := ''; //Split the string based on any delimiter which is not a number if Length(wsInput) > 0 then begin for i := 1 to Length(wsInput) do begin if wsInput[i] in [WideChar('0')..WideChar('9')] then begin InDelimiter := False; wsTemp := wsTemp + wsInput[i]; end else begin if (not InDelimiter) and (Length(wsTemp) > 0) then begin inc(NumsFound); Result := True; Case NumsFound of 1: begin verMajor := StrToIntDef(wsTemp, 0); wsOutput := wsTemp; end; 2: begin verMinor := StrToIntDef(wsTemp, 0); wsOutput := wsOutput + '.' + wsTemp; end; 3: begin verBuild := StrToIntDef(wsTemp, 0); wsOutput := wsOutput + '.' + wsTemp; end; 4: begin verRelease := StrToIntDef(wsTemp, 0); wsOutput := wsOutput + '.' + wsTemp; end; end; wsTemp := ''; InDelimiter := True; end; end; end; if Length(wsTemp) > 0 then begin inc(NumsFound); Case NumsFound of 1: begin verMajor := StrToIntDef(wsTemp, 0); wsOutput := wsTemp; end; 2: begin verMinor := StrToIntDef(wsTemp, 0); wsOutput := wsOutput + '.' + wsTemp; end; 3: begin verBuild := StrToIntDef(wsTemp, 0); wsOutput := wsOutput + '.' + wsTemp; end; 4: begin verRelease := StrToIntDef(wsTemp, 0); wsOutput := wsOutput + '.' + wsTemp; end; end; end; end; end; function FileHasUTF8Header(const FileName: WideString): Boolean; var sList: TStringList; begin Result := False; //default sList := TStringList.Create; try sList.LoadFromFile(FileName); if HasUTF8Header(sList.Text) then Result := True; finally sList.Free; end; end; //This function is designed to search an 8-bit string for any //sequences that suggest it should be treated as UTF-8. function HasUTF8Header(InString: string): Boolean; var strTemp: string; begin Result := False; //Default //Trim the string InString := Trim(InString); //Look for an XML header if Copy(InString, 1, 5) = '', InString)); strTemp := UpperCase(strTemp); Result := True; // default encoding is UTF-8 in XML if Pos('ENCODING', strTemp) > 0 then if Pos('UTF-8', strTemp) < 0 then Result := False; //Alternative encoding is specified Exit; end; //Maybe it's an HTML file InString := UpperCase(InString); strTemp := GetTextBetween(InString, ''); if Length(strTemp) > 0 then if Pos('CHARSET', strTemp) > 0 then if Pos('UTF-8', strTemp) > Pos('CHARSET', strTemp) then Result := True; end; //This function analyses the bytes in a file looking for likely //sequences that indicate that the file may be UTF-8. It's based //on a suggested algorithm here: //http://mail.nl.linux.org/linux-utf8/1999-09/msg00110.html //It's not perfect; little-endian Unicode triggers a True result, so //check for a BOM first before relying on this! function FileMayBeUTF8(FileName: WideString): Boolean; var Stream: TMemoryStream; BytesRead: integer; ArrayBuff: array[0..127] of byte; PreviousByte: byte; i: integer; YesSequences, NoSequences: integer; begin if not WideFileExists(FileName) then Exit; YesSequences := 0; NoSequences := 0; Stream := TMemoryStream.Create; try Stream.LoadFromFile(FileName); repeat {read from the TMemoryStream} BytesRead := Stream.Read(ArrayBuff, High(ArrayBuff) + 1); {Do the work on the bytes in the buffer} if BytesRead > 1 then begin for i := 1 to BytesRead-1 do begin PreviousByte := ArrayBuff[i-1]; if ((ArrayBuff[i] and $c0) = $80) then begin if ((PreviousByte and $c0) = $c0) then begin inc(YesSequences) end else begin if ((PreviousByte and $80) = $0) then inc(NoSequences); end; end; end; end; until (BytesRead < (High(ArrayBuff) + 1)); //Below, >= makes ASCII files = UTF-8, which is no problem. //Simple > would catch only UTF-8; Result := (YesSequences >= NoSequences); finally Stream.Free; end; end; //There's something wrong with this function. It throws up an abstract error //when trying to load the file. No idea why yet, so don't use it! function LoadFileCarefullyToWideString(FileName: WideString): WideString; var TntStrings: TTntStrings; Stream: TTntFileStream; FileCharSet: TTntStreamCharSet; begin TntStrings := TTntStrings.Create; try Stream := TTntFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite); try //First, check for charset in the normal way FileCharSet := AutoDetectCharacterSet(Stream); Stream.Position := 0; //If it comes back ansi, we have to be careful if FileCharSet = csAnsi then begin if FileMayBeUTF8(FileName) then begin TntStrings.AnsiStrings.LoadFromStreamEx(Stream, CP_UTF8); end else begin {Now we should try reading the file for any other clues. It's likely, though, that if the above test failed and yet it says it's UTF-8, it's actually not; but we have to honour the declaration, I think. } if FileHasUTF8Header(FileName) then TntStrings.AnsiStrings.LoadFromStreamEx(Stream, CP_UTF8) else TntStrings.AnsiStrings.LoadFromStream(Stream); end; end else begin //It's got a BOM, so we can load it in the normal way TntStrings.LoadFromStream(Stream); end; finally Stream.Free; end; Result := TntStrings.Text; finally TntStrings.Free; end; end; function GetFileCharSet(FileName: WideString): TTntStreamCharSet; var Stream: TTntFileStream; begin Stream := TTntFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite); try //First, check for charset in the normal way Result := AutoDetectCharacterSet(Stream); finally Stream.Free; end; end; //This takes a wide string, and if it's longer than MaxLen, truncates it //and adds an ellipsis. function WTruncWSToMaxLen(InString: WideString; MaxLen: integer): WideString; begin Result := InString; if MaxLen < 1 then Exit; if Length(InString) <= MaxLen then Exit else begin if MaxLen >=4 then begin Result := Copy(InString, 1, MaxLen-3) + WideString('...'); end else Result := Copy(InString, 1, MaxLen); end; end; end.