Module Delimited.Read

Read CSVs & CSV-like delimited formats (following the CSV quoting behaviour).

These formats are loosely documented by RFC 4180: https://www.ietf.org/rfc/rfc4180.txt

include module type of Delimited_kernel.Read
exception Bad_csv_formatting of string list * string

Row up to the error, and the field with the error up to the point of failure. Same as Expert.Parse_state.Bad_csv_formatting.

type 'a t

This provides an applicative interface for constructing values from a csv file.

An 'a t describes how to build an OCaml model 'a for each row.

Simple example:

type t =
  { foo : int
  ; bar : string
  }

(* Describes how to generate a [t] from a row of a csv file *)
let parse : t Delimited_kernel.Parse.t =
  let open Delimited_kernel.Parse.Let_syntax in
  let%map_open foo = at_header "foo" ~f:Int.of_string
  and bar = at_header "bar" ~f:String.of_string in
  { foo; bar }
;;

let _ =
  Delimited_kernel.Parse.list_of_string ~header:`Yes parse
    "foo,bar\n2,\"hello, world\"\n"
;;
include Core_kernel.Applicative.S with type 'a t := 'a t
include Base__.Applicative_intf.For_let_syntax
type 'a t
val return : 'a -> 'a t
val map : 'a t -> f:('a -> 'b) -> 'b t
val both : 'a t -> 'b t -> ('a * 'b) t
include Base__.Applicative_intf.Applicative_infix with type 'a t := 'a t
type 'a t
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t

same as apply

val (<*) : 'a t -> unit t -> 'a t
val (*>) : unit t -> 'a t -> 'a t
val (>>|) : 'a t -> ('a -> 'b) -> 'b t
val apply : ('a -> 'b) t -> 'a t -> 'b t
val map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t
val map3 : 'a t -> 'b t -> 'c t -> f:('a -> 'b -> 'c -> 'd) -> 'd t
val all : 'a t list -> 'a list t
val all_unit : unit t list -> unit t
val all_ignore : unit t list -> unit t
module Let_syntax : sig ... end
val at_index : int -> f:(string -> 'a) -> 'a t

Read a field at the given index. Use f to convert the field from string.

val at_header : string -> f:(string -> 'a) -> 'a t

Read a field at the given header. Use f to convert the field from string.

Note that if the given header is not provided through either the file or the ~header argument to the parsers, this will fail at runtime.

val at_header_opt : string -> f:(string option -> 'a) -> 'a t

Read a field at the given header, if it exists. Use f to convert the field from string.

module Header = Delimited_kernel__.Header

Header parsing control

module Row : sig ... end

Whole-row parsing.

val fold_string : ?⁠strip:bool -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a -> 'b) -> string -> 'b

Fold the CSV rows contained in the given string.

val list_of_string : ?⁠strip:bool -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> string -> 'a list

Load the CSV as a list

module Expert = Delimited_kernel.Read.Expert

Experts only. If you really think you need a function in this module, please talk to a delimited dev first.

val fold_reader : ?⁠strip:bool -> ?⁠skip_lines:int -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a -> 'b Async.Deferred.t) -> Async.Reader.t -> 'b Async.Deferred.t

fold_reader ?strip ?skip_lines ?sep ?quote ~init ~f r produces a value by folding over a csv document read from r. The reader will be closed on EOF.

If strip is true, leading and trailing whitespace is stripped from each field. Default value is false.

If skip_lines > 0, that many lines are skipped at the start of the input. Note that this skips lines without doing any CSV parsing of the lines being skipped, so newlines within a quoted field are treated identically to newlines outside a quoted field. Default value is 0.

sep is the character that separates fields within a row. Default value is ','

quote defines a character to use for quoting. `Using '"' implements the MS Excel convention: either a field is unquoted, or it has leading and trailing quotes and internal escaped characters are represented as quote-char char, e.g., "\n to escape a newline. `No_quoting means all characters are literal. The default is `Using '"'

val fold_reader' : ?⁠strip:bool -> ?⁠skip_lines:int -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a Core.Queue.t -> 'b Async.Deferred.t) -> Async.Reader.t -> 'b Async.Deferred.t

fold_reader' ?strip ?skip_lines ?sep ?quote ~init ~f r works similarly to fold_reader, except for the f argument. fold_reader' runs f on batches of Row.ts rather than running f on each individual row.

val fold_reader_without_pushback : ?⁠strip:bool -> ?⁠skip_lines:int -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a -> 'b) -> Async.Reader.t -> 'b Async.Deferred.t

Same as fold_reader but the fold function does not exert pushback on the fold.

val pipe_of_reader : ?⁠strip:bool -> ?⁠skip_lines:int -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> Async.Reader.t -> 'a Async.Pipe.Reader.t

pipe_of_reader t reader produces a pipe reader of parsed values.

val create_reader : ?⁠strip:bool -> ?⁠skip_lines:int -> ?⁠sep:char -> ?⁠quote:[ `No_quoting | `Using of char ] -> ?⁠header:Header.t -> ?⁠on_invalid_row:'a On_invalid_row.t -> 'a t -> string -> 'a Async.Pipe.Reader.t Async.Deferred.t

create_reader filename opens a reader for the given filename & returns a pipe of its parsed values.