Parsing of VCF files.
This module implements VCFv4.1, as defined by 1000 genomes project:
http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
module Biocaml_vcf: sigtype vcf_id = string
type vcf_description = string
type vcf_number =
|
Number of int
|
OnePerAllele
|
OnePerGenotype
|
Unknown
type vcf_format_type = [ `character_value | `float_value | `integer_value | `string_value ]
type vcf_info_type = [ `character_value
| `flag_value
| `float_value
| `integer_value
| `string_value ]
type vcf_info_meta =
|
Info of vcf_number * vcf_info_type
* vcf_description
type vcf_filter_meta =
|
Filter of vcf_description
type vcf_format_meta =
|
Format of vcf_number * vcf_format_type
* vcf_description
type vcf_alt_meta =
|
Alt of vcf_description
type vcf_meta = {
vcfm_version : string;
vcfm_id_cache : vcf_id Core.Std.Set.Poly.t;
vcfm_info : (vcf_id, vcf_info_meta) Core.Std.Hashtbl.t;
vcfm_filters : (vcf_id * vcf_filter_meta) list;
vcfm_format : (vcf_id, vcf_format_meta) Core.Std.Hashtbl.t;
vcfm_alt : (string, vcf_alt_meta) Core.Std.Hashtbl.t;
vcfm_arbitrary : (string, string) Core.Std.Hashtbl.t;
vcfm_header : string list;
vcfm_samples : string list;
type vcf_format = [ `character of char
| `float of float
| `integer of int
| `missing
| `string of string ]
type vcf_info = [ `character of char
| `flag of string
| `float of float
| `integer of int
| `missing
| `string of string ]
type vcf_row = {
vcfr_chrom : string;
vcfr_pos : int;
vcfr_ids : string list;
vcfr_ref : string;
vcfr_alts : string list;
vcfr_qual : float option;
vcfr_filter : vcf_id list;
vcfr_samples : (vcf_id, (vcf_id * vcf_format list) list)
Core.Std.Hashtbl.t;
type item = vcf_row
type vcf_parse_row_error = [ `duplicate_ids of vcf_id list
| `format_type_coersion_failure of vcf_format_type * string
| `info_type_coersion_failure of vcf_info_type * string
| `invalid_arguments_length of vcf_id * int * int
| `invalid_dna of string
| `invalid_float of string
| `invalid_int of string
| `invalid_row_length of int * int
| `malformed_sample of string
| `unknown_alt of string
| `unknown_filter of vcf_id
| `unknown_format of vcf_id
| `unknown_info of vcf_id ]
type vcf_parse_error = [ `incomplete_input of
Biocaml_internal_utils.Pos.t * string list * string option
| `malformed_header of Biocaml_internal_utils.Pos.t * string
| `malformed_meta of Biocaml_internal_utils.Pos.t * string
| `malformed_row of
Biocaml_internal_utils.Pos.t * vcf_parse_row_error * string
| `not_ready ]
val parse_error_to_string : vcf_parse_error -> stringmodule Transform: sigval string_to_item : ?filename:string ->
unit ->
(string,
(Biocaml_vcf.item, Biocaml_vcf.vcf_parse_error)
Biocaml_internal_utils.Result.t)
Biocaml_transform.t endend