Up

Module Escaping

Operations for escaping and unescaping strings, with paramaterized escape and escapeworthy characters. Escaping/unescaping using this module is more efficient than using Pcre. Benchmark code can be found in core/benchmarks/string_escaping.ml.

Signature

val escape_gen_exn : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Staged.t

escape_gen_exn escapeworthy_map escape_char returns a function that will escape a string s as follows: if (c1,c2) is in escapeworthy_map, then all occurences of c1 are replaced by escape_char concatenated to c2.

Raises an exception if escapeworthy_map is not one-to-one. If escape_char is not in escapeworthy_map, then it will be escaped to itself.

val escape_gen : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Or_error.t
val escape : escapeworthy:char list -> escape_char:char -> (string -> string) Staged.t

escape ~escapeworthy ~escape_char s is


        escape_gen_exn ~escapeworthy_map:(List.zip_exn escapeworthy escapeworthy)
          ~escape_char
      

. Duplicates and escape_char will be removed from escapeworthy. So, no exception will be raised

val unescape_gen_exn : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Staged.t

unescape_gen_exn is the inverse operation of escape_gen_exn. That is,


      let escape = Staged.unstage (escape_gen_exn ~escapeworthy_map ~escape_char) in
      let unescape = Staged.unstage (unescape_gen_exn ~escapeworthy_map ~escape_char) in
      assert (s = unescape (escape s))
      

always succeed when ~escapeworthy_map is not causing exceptions.

val unescape_gen : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Or_error.t
val unescape : escape_char:char -> (string -> string) Staged.t

unescape ~escape_char is defined as unescape_gen_exn ~map:[] ~escape_char

val is_char_escaping : string -> escape_char:char -> int -> bool

Any char in an escaped string is either escaping, escaped or literal. For example, for escaped string "0_a0__0" with escape_char as '_', pos 1 and 4 are escaping, 2 and 5 are escaped, and the rest are literal

is_char_escaping s ~escape_char pos return true if the char at pos is escaping, false otherwise.

val is_char_escaped : string -> escape_char:char -> int -> bool

is_char_escaped s ~escape_char pos return true if the char at pos is escaped, false otherwise.

val is_char_literal : string -> escape_char:char -> int -> bool

is_literal s ~escape_char pos return true if the char at pos is not escaped or escaping.

val index : string -> escape_char:char -> char -> int option

index s ~escape_char char find the first literal (not escaped) instance of char in s starting from 0.

val index_exn : string -> escape_char:char -> char -> int
val rindex : string -> escape_char:char -> char -> int option

rindex s ~escape_char char find the first literal (not escaped) instance of char in s starting from the end of s and proceeding towards 0.

val rindex_exn : string -> escape_char:char -> char -> int
val index_from : string -> escape_char:char -> int -> char -> int option

index_from s ~escape_char pos char find the first literal (not escaped) instance of char in s starting from pos and proceeding towards the end of s.

val index_from_exn : string -> escape_char:char -> int -> char -> int
val rindex_from : string -> escape_char:char -> int -> char -> int option

rindex_from s ~escape_char pos char find the first literal (not escaped) instance of char in s starting from pos and towards 0.

val rindex_from_exn : string -> escape_char:char -> int -> char -> int
val split : string -> on:char -> escape_char:char -> string list

split s ~escape_char ~on

Returns a list of substrings of s that are separated by literal versions of on. Consecutive on characters will cause multiple empty strings in the result. Splitting the empty string returns a list of the empty string, not the empty list.
e.g. split ~escape_char:'_' ~on:',' "foo,bar_,baz" = "foo"; "bar_,baz"
val split_on_chars : string -> on:char list -> escape_char:char -> string list

split_on_chars s ~on

Returns a list of all substrings of s that are separated by one of the literal chars from on. on are not grouped. So a grouping of on in the source string will produce multiple empty string splits in the result.
e.g. split_on_chars ~escape_char:'_' ~on:',';'|' "foo_|bar,baz|0" -> "foo_|bar"; "baz"; "0"
val lsplit2 : string -> on:char -> escape_char:char -> (string * string) option

lsplit2 s on escape_char splits s into a pair on the first literal instance of on (meaning the first unescaped instance) starting from the left.

val lsplit2_exn : string -> on:char -> escape_char:char -> string * string
val rsplit2 : string -> on:char -> escape_char:char -> (string * string) option

rsplit2 s on escape_char splits s into a pair on the first literal instance of on (meaning the first unescaped instance) starting from the right.

val rsplit2_exn : string -> on:char -> escape_char:char -> string * string