; Tests of sharp char expressions in ScriptFu ; This only tests: ; "sharp character" #\ ; "sharp character hex" #\x ; sharp expressions for whitespace ; See also: ; sharp-expr.scm ; sharp-expr-number.scm ; This also only tests a subset: the ASCII subset. ; See also: sharp-expr-unichar.scm ; #\ denotes a character constant where is one character ; The one character may be multiple bytes in UTF-8, ; but should appear in the display as a single glyph, ; but may appear as a box glyph for unichar chars outside ASCII. ; #\x denotes a character constant where is a sequence of hex digits ; See mk_sharp_const() ; #\space #\newline #\return and #\tab also denote character constants. ; sharp backslash space "#\ " parses as a token and yields a char atom. ; See the code, there is a space here: " tfodxb\\" ; See the test below. ; #U+ notation for unichar character constants is not in ScriptFu ; Any sharp character followed by characters not described above ; MAY optionally be a sharp expression when a program ; uses the "sharp hook" by defining symbol *sharp-hook* . ; sharp constants for whitespace ; codepoints tab 9, newline 10, return 13, space 32 (aka whitespace) ; TinyScheme and ScriptFu prints these solitary unichars by a string representation, ; but only when they are not in a string! ; This subset of codepoints are ignored by the parser as whitespace. ; It is common for older scripts to use sharp expression constants for these codepoints. (assert '(equal? (integer->char 9) #\tab)) (assert '(equal? (integer->char 10) #\newline)) (assert '(equal? (integer->char 13) #\return)) (assert '(equal? (integer->char 32) #\space)) ; sharp constant character ; Unicode codepoints in range [33, 126] ; e.g. the letter A, ASCII 65 (assert '(equal? (integer->char 65) #\A)) (assert '(char? #\A)) (assert '(atom? #\A)) ; Tests of functions using a non-printing, control character ASCII ; Codepoint BEL \x7 (assert '(equal? (integer->char 7) #\)) (assert '(char? #\)) (assert '(atom? #\)) ; string function takes sequence of chars (assert (equal? (string #\) "")) ; Unicode codepoints [0-8][11-12][14-31] ; (less than 32 excepting tab 9, newline 10, return 13) ; The "non-printing" characters ; e.g. 7, the character that in ancient times rang a bell sound ; Upstream TinyScheme prints these differently from ScriptFu, as a string repr of the char. ; since TinyScheme default compiles with option "USE_ASCII_NAMES" ;>(integer->char 7) ;#\bel ;>(integer->char 127) ;#\del ; ScriptFu prints solitary Unichars ; for codepoints below 32 and also 127 differently than upstream TinyScheme. ; Except ScriptFu is same as TinyScheme for tab, space, newline, return codepoints. ; ScriptFu shows a glyph that is a box with a hex number. ; Formerly (before the fixes for this test plan) Scriptfu printed these like TinyScheme, ; by a sharp constant hex e.g. #\x1f for 31 ; Edge codepoint tests ; Tests of edge cases, near a code slightly different ; Codepoint US Unit Separator, edge case to 32, space (assert '(equal? (integer->char 31) #\)) (assert '(equal? #\ #\x1f)) ; codepoint 127 x7f (DEL), edge case to 128 (assert '(equal? (integer->char 127) #\x7f)) ; sharp constant hex character ; Sharp char expr hex denotes char atom ; But not the REPL printed representation of characters. ; is-a char (assert '(char? #\x65)) ; equals a sharp character: lower case e (assert '(equal? #\x65 #\e)) ; sharp char hex notation accepts a single hex digit (assert '(char? #\x3)) ; sharp char hex notation accepts two hex digits (assert '(char? #\x33)) ; edge case, max hex that fits in 8-bits (assert '(char? #\xff)) ; sharp car expr hex accepts three digits ; when they are leading zeroes (assert '(char? #\x033)) ; Otherwise, three digits not leading zeros ; are unicode. ; codepoint x3bb is a valid character (greek lambda) ; but is outside ASCII range. ; See sharp-expr-unichar.scm ; sharp constant hex character: invalid unichar ; Unicode has a range, but sparsely populated with valid codes. ; Unicode is unsigned, range is [0,x10FFF] ; Greatest valid codepoint is x10FFFF (to match UTF-16) ; Sparsely populated: some codepoints in range are not valid ; because they are incorrectly encoded using UTF-8 algorithm. ; (This is a paraphrase: please consult the standard.) ; These tests are not a complete test of UTF-8 compliance !!! ; Edge case: max valid codepoint (assert (equal? #\x10FFFF #\􏿿)) ; Edge case: zero is considered a valid codepoint ; !!! Although also a string terminator. (assert '(equal? (integer->char 0) #\x0)) ; sharp constants for delimiter characters ; These test the sharp constant notation for characters space and parens ; These are in the ASCII range ; !!! A space char in a sharp constant expr (assert (char? #\ )) ; Whose representation is a space character. (assert (string=? (atom->string #\ ) " ")) ; !!! A right paren char in a sharp constant expr ; Note that backslash captures the first right paren: ; the parens do not appear to match. (assert (char? #\))) ; Ditto for left paren (assert (char? #\()) ; !!! But easy for author to confuse the parser ; assert-error can't catch syntax errors. ; So can only test in the REPL. ; > (char? #\) ; Error: syntax error: expected right paren, found EOF" ; #\# is the sharp or pound sign char (assert (char? #\#)) (assert (string=? (atom->string #\# ) "#")) ; #\x is lower case x (assert (char? #\x)) (assert (string=? (atom->string #\x ) "x")) ; see also integer2char.scm ; Common misunderstandings or typos ; #\t is a character, lower case t ; It is not the denotation for truth. (assert `(not (equal? #\t #t))) ; It is not the denotation for #\tab. (assert `(not (equal? #\t #\tab))) ; It is a char (assert `(char? #\t)) ; Its string representation is lower case t character (assert `(string=? (atom->string #\t) "t")) ; a number converted to string that is representation in base 16 ; !!! This is not creating a Unichar. ; It is printing the hex representation of decimal 955, without a leading "\x" (assert `(string=? (number->string 955 16) "3bb")) ; Untestable sharp constant hex character ; Test framework can't test, these cases are syntax errors. ; These cases yield "syntax: illegal sharp constant expression" in REPL ; sharp constant hex having non-hex digit is an error ; z is not in [a-f0-9] ; > #\xz ; Error: syntax: illegal sharp constant expression ; Also prints warning "Hex literal has invalid digits" in stderr