Module String.Escaping
Operations for escaping and unescaping strings, with parameterized escape and escapeworthy characters. Escaping/unescaping using this module is more efficient than using Pcre. Benchmark code can be found in core/benchmarks/string_escaping.ml.
val escape_gen_exn : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Staged.t
escape_gen_exn escapeworthy_map escape_char
returns a function that will escape a strings
as follows: if(c1,c2)
is inescapeworthy_map
, then all occurrences ofc1
are replaced byescape_char
concatenated toc2
.Raises an exception if
escapeworthy_map
is not one-to-one. Ifescape_char
is not inescapeworthy_map
, then it will be escaped to itself.
val escape_gen : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Or_error.t
val escape : escapeworthy:char list -> escape_char:char -> (string -> string) Staged.t
escape ~escapeworthy ~escape_char s
isescape_gen_exn ~escapeworthy_map:(List.zip_exn escapeworthy escapeworthy) ~escape_char
Duplicates and
escape_char
will be removed fromescapeworthy
. So, no exception will be raised
val unescape_gen_exn : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Staged.t
unescape_gen_exn
is the inverse operation ofescape_gen_exn
. That is,let escape = Staged.unstage (escape_gen_exn ~escapeworthy_map ~escape_char) in let unescape = Staged.unstage (unescape_gen_exn ~escapeworthy_map ~escape_char) in assert (s = unescape (escape s))
always succeed when ~escapeworthy_map is not causing exceptions.
val unescape_gen : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Or_error.t
val unescape : escape_char:char -> (string -> string) Staged.t
unescape ~escape_char
is defined asunescape_gen_exn ~map:[] ~escape_char
val is_char_escaping : string -> escape_char:char -> int -> bool
Any char in an escaped string is either escaping, escaped, or literal. For example, for escaped string
"0_a0__0"
withescape_char
as'_'
, pos 1 and 4 are escaping, 2 and 5 are escaped, and the rest are literal.is_char_escaping s ~escape_char pos
returns true if the char atpos
is escaping, false otherwise.
val is_char_escaped : string -> escape_char:char -> int -> bool
is_char_escaped s ~escape_char pos
returns true if the char atpos
is escaped, false otherwise.
val is_char_literal : string -> escape_char:char -> int -> bool
is_char_literal s ~escape_char pos
returns true if the char atpos
is not escaped or escaping.
val index : string -> escape_char:char -> char -> int option
index s ~escape_char char
finds the first literal (not escaped) instance ofchar
in s starting from 0.
val index_exn : string -> escape_char:char -> char -> int
val rindex : string -> escape_char:char -> char -> int option
rindex s ~escape_char char
finds the first literal (not escaped) instance ofchar
ins
starting from the end ofs
and proceeding towards 0.
val rindex_exn : string -> escape_char:char -> char -> int
val index_from : string -> escape_char:char -> int -> char -> int option
index_from s ~escape_char pos char
finds the first literal (not escaped) instance ofchar
ins
starting frompos
and proceeding towards the end ofs
.
val index_from_exn : string -> escape_char:char -> int -> char -> int
val rindex_from : string -> escape_char:char -> int -> char -> int option
rindex_from s ~escape_char pos char
finds the first literal (not escaped) instance ofchar
ins
starting frompos
and towards 0.
val rindex_from_exn : string -> escape_char:char -> int -> char -> int
val split : string -> on:char -> escape_char:char -> string list
split s ~escape_char ~on
returns a list of substrings ofs
that are separated by literal versions ofon
. Consecutiveon
characters will cause multiple empty strings in the result. Splitting the empty string returns a list of the empty string, not the empty list.E.g.,
split ~escape_char:'_' ~on:',' "foo,bar_,baz" = ["foo"; "bar_,baz"]
.
val split_on_chars : string -> on:char list -> escape_char:char -> string list
split_on_chars s ~on
returns a list of all substrings ofs
that are separated by one of the literal chars fromon
.on
are not grouped. So a grouping ofon
in the source string will produce multiple empty string splits in the result.E.g.,
split_on_chars ~escape_char:'_' ~on:[',';'|'] "foo_|bar,baz|0" -> ["foo_|bar"; "baz"; "0"]
.
val lsplit2 : string -> on:char -> escape_char:char -> (string * string) option
lsplit2 s ~on ~escape_char
splits s into a pair on the first literal instance ofon
(meaning the first unescaped instance) starting from the left.
val lsplit2_exn : string -> on:char -> escape_char:char -> string * string
val rsplit2 : string -> on:char -> escape_char:char -> (string * string) option
rsplit2 s ~on ~escape_char
splitss
into a pair on the first literal instance ofon
(meaning the first unescaped instance) starting from the right.
val rsplit2_exn : string -> on:char -> escape_char:char -> string * string
val lstrip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t
These are the same as
lstrip
,rstrip
, andstrip
for generic strings, except that they only drop literal characters -- they do not drop characters that are escaping or escaped. This makes sense if you're trying to get rid of junk whitespace (for example), because escaped whitespace seems more likely to be deliberate and not junk.