SAM files. Documentation here assumes familiarity with the SAM
specification.
module Biocaml_sam: sigtype header_item_tag = private [< `CO | `HD | `Other of string | `PG | `RG | `SQ ]
type tag_value = private string * string
type sort_order = [ `Coordinate | `Query_name | `Unknown | `Unsorted ]
type header_line = private {
version : string;
(* VN *)
type ref_seq = private {
name : string;
(* SN *)
length : int;
(* LN *)
assembly : string option;
(* AS *)
md5 : string option;
(* M5 *)
species : string option;
(* SP *)
uri : string option;
(* UR *)
type platform = [ `Capillary
| `Helicos
| `Illumina
| `Ion_Torrent
| `LS454
| `Pac_Bio
| `Solid ]
type read_group = private {
id : string;
(* ID *)
seq_center : string option;
(* CN *)
description : string option;
(* DS *)
run_date : [ `Date of Core.Std.Date.t | `Time of Core.Std.Time.t ] option;
(* DT *)
flow_order : string option;
(* FO *)
key_seq : string option;
(* KS *)
library : string option;
(* LB *)
program : string option;
(* PG *)
predicted_median_insert_size : int option;
(* PI *)
platform_unit : string option;
(* PU *)
sample : string option;
(* SM *)
type program = private {
id : string;
(* ID *)
name : string option;
(* PN *)
command_line : string option;
(* CL *)
previous_id : string option;
(* PP *)
description : string option;
(* DS *)
version : string option;
(* VN *)
type header_item = private [< `CO of string
| `HD of header_line
| `Other of string * tag_value list
| `PG of program
| `RG of read_group
| `SQ of ref_seq ]
type header = private {
version : string option;
sort_order : sort_order option;
ref_seqs : ref_seq list;
read_groups : read_group list;
programs : program list;
comments : string list;
others : (string * tag_value list) list;
sort_order: Guaranteed to be None if version = None.
ref_seqs: List of @SQ items. Order matters; it dictates
alignment sorting order when sort_order = `Coordinate.read_groups: Unordered list of @RG items.programs: List of @PG lines. Currently unordered, but we should
topologically sort.comments: Unordered list of @CO lines.val empty_header : header module Flags: sigtype t = private int
val has_multiple_segments : t -> boolval each_segment_properly_aligned : t -> boolval segment_unmapped : t -> boolval next_segment_unmapped : t -> boolval seq_is_reverse_complemented : t -> boolval next_seq_is_reverse_complemented : t -> boolval first_segment : t -> boolval last_segment : t -> boolval secondary_alignment : t -> boolval not_passing_quality_controls : t -> boolval pcr_or_optical_duplicate : t -> boolval supplementary_alignment : t -> boolendtype cigar_op = private [< `Alignment_match of int
| `Deletion of int
| `Hard_clipping of int
| `Insertion of int
| `Padding of int
| `Seq_match of int
| `Seq_mismatch of int
| `Skipped of int
| `Soft_clipping of int ]
type optional_field_value = private [< `A of char
| `B of char * string list
| `H of string
| `Z of string
| `f of float
| `i of Core.Std.Int32.t ]
type rnext = private [< `Equal_to_RNAME | `Value of string ]
type alignment = private {
qname : string option;
(* QNAME *)
rname : string option;
(* RNAME *)
pos : int option;
(* POS *)
mapq : int option;
(* MAPQ *)
pnext : int option;
(* PNEXT *)
tlen : int option;
(* TLEN *)
seq : string option;
(* SEQ *)
optional_fields : optional_field list;
cigar and qual, empty list indicates no value, i.e. '*',
was given.module MakeIO:functor (Future:Future.S) ->sigval read :?start:Biocaml_internal_utils.Pos.t ->
Future.Reader.t ->
(Biocaml_sam.header *
Biocaml_sam.alignment Core.Std.Or_error.t Future.Pipe.Reader.t)
Core.Std.Or_error.t Future.Deferred.tval write :Future.Writer.t ->
?header:Biocaml_sam.header ->
Biocaml_sam.alignment Future.Pipe.Reader.t -> unit Future.Deferred.tval write_file :?perm:int ->
?append:bool ->
string ->
?header:Biocaml_sam.header ->
Biocaml_sam.alignment Future.Pipe.Reader.t -> unit Future.Deferred.tend
include ??
val header_line : version:string ->
?sort_order:sort_order ->
unit -> header_line Core.Std.Or_error.tval ref_seq : name:string ->
length:int ->
?assembly:string ->
?md5:string ->
?species:string ->
?uri:string -> unit -> ref_seq Core.Std.Or_error.tval read_group : id:string ->
?seq_center:string ->
?description:string ->
?run_date:string ->
?flow_order:string ->
?key_seq:string ->
?library:string ->
?program:string ->
?predicted_median_insert_size:int ->
?platform:platform ->
?platform_unit:string ->
?sample:string -> unit -> read_group Core.Std.Or_error.trun_date string will be parsed as a Date.t or Time.t,
whichever is possible. If it is a time without a timezone, local
timezone will be assumed.val header : ?version:string ->
?sort_order:sort_order ->
?ref_seqs:ref_seq list ->
?read_groups:read_group list ->
?programs:program list ->
?comments:string list ->
?others:(string * tag_value list) list ->
unit -> header Core.Std.Or_error.tval parse_header_item_tag : string -> header_item_tag Core.Std.Or_error.tval parse_tag_value : string -> tag_value Core.Std.Or_error.tval parse_header_version : string -> string Core.Std.Or_error.tval parse_sort_order : string -> sort_order Core.Std.Or_error.tval parse_header_line : tag_value list -> header_line Core.Std.Or_error.tval parse_ref_seq : tag_value list -> ref_seq Core.Std.Or_error.tval parse_platform : string -> platform Core.Std.Or_error.tval parse_read_group : tag_value list -> read_group Core.Std.Or_error.tval parse_program : tag_value list -> program Core.Std.Or_error.tval parse_header_item : Biocaml_internal_utils.Line.t -> header_item Core.Std.Or_error.tval parse_header : string -> header Core.Std.Or_error.tval cigar_op_alignment_match : int -> cigar_op Core.Std.Or_error.tval cigar_op_insertion : int -> cigar_op Core.Std.Or_error.tval cigar_op_deletion : int -> cigar_op Core.Std.Or_error.tval cigar_op_skipped : int -> cigar_op Core.Std.Or_error.tval cigar_op_soft_clipping : int -> cigar_op Core.Std.Or_error.tval cigar_op_hard_clipping : int -> cigar_op Core.Std.Or_error.tval cigar_op_padding : int -> cigar_op Core.Std.Or_error.tval cigar_op_seq_match : int -> cigar_op Core.Std.Or_error.tval cigar_op_seq_mismatch : int -> cigar_op Core.Std.Or_error.tval optional_field_value_A : char -> optional_field_value Core.Std.Or_error.tval optional_field_value_i : Core.Std.Int32.t -> optional_field_value val optional_field_value_f : float -> optional_field_value val optional_field_value_Z : string -> optional_field_value Core.Std.Or_error.tval optional_field_value_H : string -> optional_field_value Core.Std.Or_error.tval optional_field_value_B : char -> string list -> optional_field_value Core.Std.Or_error.tval optional_field : string ->
optional_field_value ->
optional_field Core.Std.Or_error.tval parse_optional_field_value : string -> optional_field_value Core.Std.Or_error.tval parse_optional_field : string -> optional_field Core.Std.Or_error.tval alignment : ?ref_seqs:Core.Std.String.Set.t ->
?qname:string ->
flags:Flags.t ->
?rname:string ->
?pos:int ->
?mapq:int ->
?cigar:cigar_op list ->
?rnext:rnext ->
?pnext:int ->
?tlen:int ->
?seq:string ->
?qual:Biocaml_phred_score.t list ->
?optional_fields:optional_field list ->
unit -> alignment Core.Std.Or_error.tval parse_qname : string -> string option Core.Std.Or_error.tval parse_flags : string -> Flags.t Core.Std.Or_error.tval parse_rname : string -> string option Core.Std.Or_error.tval parse_pos : string -> int option Core.Std.Or_error.tval parse_mapq : string -> int option Core.Std.Or_error.tval parse_cigar : string -> cigar_op list Core.Std.Or_error.tval parse_rnext : string -> rnext option Core.Std.Or_error.tval parse_pnext : string -> int option Core.Std.Or_error.tval parse_tlen : string -> int option Core.Std.Or_error.tval parse_seq : string -> string option Core.Std.Or_error.tval parse_qual : string -> Biocaml_phred_score.t list Core.Std.Or_error.tval parse_alignment : ?ref_seqs:Core.Std.String.Set.t ->
Biocaml_internal_utils.Line.t -> alignment Core.Std.Or_error.tval print_header_item_tag : header_item_tag -> stringval print_tag_value : tag_value -> stringval print_header_version : string -> stringval print_sort_order : sort_order -> stringval print_header_line : header_line -> stringval print_ref_seq : ref_seq -> stringval print_platform : platform -> stringval print_read_group : read_group -> stringval print_program : program -> stringval print_other : string * tag_value list -> stringval print_qname : string option -> stringval print_flags : Flags.t -> stringval print_rname : string option -> stringval print_pos : int option -> stringval print_mapq : int option -> stringval print_cigar_op : cigar_op -> stringval print_cigar : cigar_op list -> stringval print_rnext : rnext option -> stringval print_pnext : int option -> stringval print_tlen : int option -> stringval print_seq : string option -> stringval print_qual : Biocaml_phred_score.t list -> stringval print_optional_field : optional_field -> stringval print_alignment : alignment -> stringval header_item_tag_of_sexp : Sexplib.Sexp.t -> header_item_tag val __header_item_tag_of_sexp__ : Sexplib.Sexp.t -> header_item_tag val sexp_of_header_item_tag : header_item_tag -> Sexplib.Sexp.tval tag_value_of_sexp : Sexplib.Sexp.t -> tag_value val sexp_of_tag_value : tag_value -> Sexplib.Sexp.tval sort_order_of_sexp : Sexplib.Sexp.t -> sort_order val __sort_order_of_sexp__ : Sexplib.Sexp.t -> sort_order val sexp_of_sort_order : sort_order -> Sexplib.Sexp.tval header_line_of_sexp : Sexplib.Sexp.t -> header_line val sexp_of_header_line : header_line -> Sexplib.Sexp.tval ref_seq_of_sexp : Sexplib.Sexp.t -> ref_seq val sexp_of_ref_seq : ref_seq -> Sexplib.Sexp.tval platform_of_sexp : Sexplib.Sexp.t -> platform val __platform_of_sexp__ : Sexplib.Sexp.t -> platform val sexp_of_platform : platform -> Sexplib.Sexp.tval read_group_of_sexp : Sexplib.Sexp.t -> read_group val sexp_of_read_group : read_group -> Sexplib.Sexp.tval program_of_sexp : Sexplib.Sexp.t -> program val sexp_of_program : program -> Sexplib.Sexp.tval header_item_of_sexp : Sexplib.Sexp.t -> header_item val __header_item_of_sexp__ : Sexplib.Sexp.t -> header_item val sexp_of_header_item : header_item -> Sexplib.Sexp.tsort_order: Guaranteed to be None if version = None.
ref_seqs: List of @SQ items. Order matters; it dictates
alignment sorting order when sort_order = `Coordinate.read_groups: Unordered list of @RG items.programs: List of @PG lines. Currently unordered, but we should
topologically sort.comments: Unordered list of @CO lines.val cigar_op_of_sexp : Sexplib.Sexp.t -> cigar_op val __cigar_op_of_sexp__ : Sexplib.Sexp.t -> cigar_op val sexp_of_cigar_op : cigar_op -> Sexplib.Sexp.tval optional_field_value_of_sexp : Sexplib.Sexp.t -> optional_field_value val __optional_field_value_of_sexp__ : Sexplib.Sexp.t -> optional_field_value val sexp_of_optional_field_value : optional_field_value -> Sexplib.Sexp.tval optional_field_of_sexp : Sexplib.Sexp.t -> optional_field val sexp_of_optional_field : optional_field -> Sexplib.Sexp.tval rnext_of_sexp : Sexplib.Sexp.t -> rnext val __rnext_of_sexp__ : Sexplib.Sexp.t -> rnext val sexp_of_rnext : rnext -> Sexplib.Sexp.tval alignment_of_sexp : Sexplib.Sexp.t -> alignment cigar and qual, empty list indicates no value, i.e. '*',
was given.val sexp_of_alignment : alignment -> Sexplib.Sexp.trun_date string will be parsed as a Date.t or Time.t,
whichever is possible. If it is a time without a timezone, local
timezone will be assumed.end