SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
sam_file/input.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cassert>
16#include <concepts>
17#include <filesystem>
18#include <fstream>
19#include <ranges>
20#include <string>
21#include <variant>
22#include <vector>
23
47
48namespace seqan3
49{
50
51// ---------------------------------------------------------------------------------------------------------------------
52// sam_file_input_traits
53// ---------------------------------------------------------------------------------------------------------------------
54
113template <typename t>
114concept sam_file_input_traits =
115 requires (t v) {
116 // field::seq
121
122 // field::id
124
125 // field::qual
128
129 // field::ref_seq
130 // either ref_info_not_given or a range over ranges over alphabet (e.g. std::vector<dna4_vector>)
131 requires std::same_as<typename t::ref_sequences, ref_info_not_given>
132 || requires () {
133 requires alphabet<std::ranges::range_reference_t<
134 std::ranges::range_reference_t<typename t::ref_sequences>>>;
135 };
136
137 // field::ref_id
139 && (!std::same_as<typename t::ref_sequences, ref_info_not_given>
141 std::ranges::range_reference_t<std::ranges::range_reference_t<typename t::ref_ids>>>);
142 requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
143 requires std::ranges::forward_range<typename t::ref_ids>;
144
145 // field::offset is fixed to int32_t
146 // field::ref_offset is fixed to std::optional<int32_t>
147 // field::flag is fixed to seqan3::sam_flag
148 // field::mapq is fixed to uint8_t
149 // field::evalue is fixed to double
150 // field::bitscore is fixed to double
151 // field::mate is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
152
153 // field::alignment
154 // the alignment type cannot be configured.
155 // Type of tuple entry 1 (reference) is set to
156 // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
157 // or 2) a "dummy" sequence type:
158 // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
159 // Type of tuple entry 2 (query) is set to
160 // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
161 // or 2) a "dummy" sequence type:
162 };
164
165// ---------------------------------------------------------------------------------------------------------------------
166// sam_file_input_default_traits
167// ---------------------------------------------------------------------------------------------------------------------
168
184template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
186{
194
197
199 template <typename _sequence_alphabet>
201
203 template <typename _id_alphabet>
205
208
210 template <typename _quality_alphabet>
212
214 using ref_sequences = ref_sequences_t;
215
217 using ref_ids = ref_ids_t;
219};
220
221// ---------------------------------------------------------------------------------------------------------------------
222// sam_file_input
223// ---------------------------------------------------------------------------------------------------------------------
224
240template <sam_file_input_traits traits_type_ = sam_file_input_default_traits<>,
241 detail::fields_specialisation selected_field_ids_ = fields<field::seq,
242 field::id,
243 field::offset,
244 field::ref_id,
245 field::ref_offset,
246 field::alignment,
247 field::cigar,
248 field::mapq,
249 field::qual,
250 field::flag,
251 field::mate,
252 field::tags,
253 field::header_ptr>,
254 detail::type_list_of_sam_file_input_formats valid_formats_ = type_list<format_sam, format_bam>>
256{
257public:
263 using traits_type = traits_type_;
265 using selected_field_ids = selected_field_ids_;
267 using valid_formats = valid_formats_;
269 using stream_char_type = char;
271
272private:
274 using dummy_ref_type = decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{})
275 | std::views::transform(detail::access_restrictor_fn{}));
276
278 using ref_sequence_unsliced_type = detail::lazy_conditional_t<
279 std::ranges::range<typename traits_type::ref_sequences const>,
282
284 using ref_sequence_sliced_type = decltype(std::declval<ref_sequence_unsliced_type>() | views::slice(0, 0));
285
286public:
295 using id_type = typename traits_type::template id_container<char>;
297 using offset_type = int32_t;
323 using mapq_type = uint8_t;
325 using quality_type = typename traits_type::template quality_container<typename traits_type::quality_alphabet>;
334
335private:
341
342public:
345
348 id_type,
354 mapq_type,
356 flag_type,
357 mate_type,
359 header_type *>;
360
383 field::id,
395
396 static_assert(
397 []() constexpr {
398 for (field f : selected_field_ids::as_array)
399 if (!field_ids::contains(f))
400 return false;
401 return true;
402 }(),
403 "You selected a field that is not valid for alignment files, please refer to the documentation "
404 "of sam_file_input::field_ids for the accepted values.");
405
410
420 using const_reference = void;
422 using size_type = size_t;
428 using const_iterator = void;
430 using sentinel = std::default_sentinel_t;
432
437 sam_file_input() = delete;
447 ~sam_file_input() = default;
448
467 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
469 {
470 init_by_filename(std::move(filename));
471 }
472
492 template <input_stream stream_t, sam_file_input_format file_format>
493 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
494 sam_file_input(stream_t & stream,
495 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
496 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
498 {
499 init_by_format<file_format>();
500 }
501
503 template <input_stream stream_t, sam_file_input_format file_format>
504 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
505 sam_file_input(stream_t && stream,
506 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
507 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
508 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
509 {
510 init_by_format<file_format>();
511 }
512
537 typename traits_type::ref_ids & ref_ids,
538 typename traits_type::ref_sequences & ref_sequences,
539 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
541 {
542 // initialize reference information
543 set_references(ref_ids, ref_sequences);
544
545 init_by_filename(std::move(filename));
546 }
547
573 template <input_stream stream_t, sam_file_input_format file_format>
574 sam_file_input(stream_t & stream,
575 typename traits_type::ref_ids & ref_ids,
576 typename traits_type::ref_sequences & ref_sequences,
577 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
578 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
580 {
581 // initialize reference information
582 set_references(ref_ids, ref_sequences);
583
584 init_by_format<file_format>();
585 }
586
588 template <input_stream stream_t, sam_file_input_format file_format>
589 sam_file_input(stream_t && stream,
590 typename traits_type::ref_ids & ref_ids,
591 typename traits_type::ref_sequences & ref_sequences,
592 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
593 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
594 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
595 {
596 // initialize reference information
597 set_references(ref_ids, ref_sequences);
598
599 init_by_format<file_format>();
600 }
601
603 // explicitly delete rvalues for reference information
605 typename traits_type::ref_ids &&,
606 typename traits_type::ref_sequences &&,
607 selected_field_ids const &) = delete;
608
609 template <input_stream stream_t, sam_file_input_format file_format>
610 sam_file_input(stream_t &&,
611 typename traits_type::ref_ids &&,
612 typename traits_type::ref_sequences &&,
613 file_format const &,
614 selected_field_ids const &) = delete;
617
639 {
640 // buffer first record
642 {
645 }
646
647 return {*this};
648 }
649
663 sentinel end() noexcept
664 {
665 return {};
666 }
667
691 reference front() noexcept
692 {
693 return *begin();
694 }
696
699
713 {
714 // make sure header is read
716 {
719 }
720
721 return *header_ptr;
722 }
723
724protected:
726
729 {
730 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
732 ->open(filename, std::ios_base::in | std::ios::binary);
733 // open stream
734 if (!primary_stream->good())
735 throw file_open_error{"Could not open file " + filename.string() + " for reading."};
736
738 detail::set_format(format, filename);
739 }
740
742 template <typename format_type>
744 {
745 static_assert(list_traits::contains<format_type, valid_formats>,
746 "You selected a format that is not in the valid_formats of this file.");
747
750 }
751
754
765
774 {}
777 {
778 delete ptr;
779 }
780
785
789 bool at_end{false};
790
793
797
802 typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
803
814 template <std::ranges::forward_range ref_sequences_t>
815 void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
816 {
817 assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
818
819 header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
820 reference_sequences_ptr = &ref_sequences;
821
822 // initialise reference map and ref_dict if ref_ids are non-empty
823 for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
824 {
825 header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");
826
827 if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
828 && std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
829 && std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
830 {
831 auto && id = header_ptr->ref_ids()[idx];
832 header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
833 }
834 else
835 {
836 header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
837 }
838 }
839 }
841
844 {
845 // clear the record
847 detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
848
849 // at end if we could not read further
852 {
853 at_end = true;
854 return;
855 }
856
857 auto call_read_func = [this](auto & ref_seq_info)
858 {
860 [&](auto & f)
861 {
862 f.read_alignment_record(*secondary_stream,
863 options,
864 ref_seq_info,
865 *header_ptr,
867 detail::get_or_ignore<field::seq>(record_buffer),
868 detail::get_or_ignore<field::qual>(record_buffer),
869 detail::get_or_ignore<field::id>(record_buffer),
870 detail::get_or_ignore<field::offset>(record_buffer),
871 detail::get_or_ignore<field::ref_seq>(record_buffer),
872 detail::get_or_ignore<field::ref_id>(record_buffer),
873 detail::get_or_ignore<field::ref_offset>(record_buffer),
874 detail::get_or_ignore<field::alignment>(record_buffer),
875 detail::get_or_ignore<field::cigar>(record_buffer),
876 detail::get_or_ignore<field::flag>(record_buffer),
877 detail::get_or_ignore<field::mapq>(record_buffer),
878 detail::get_or_ignore<field::mate>(record_buffer),
879 detail::get_or_ignore<field::tags>(record_buffer),
880 detail::get_or_ignore<field::evalue>(record_buffer),
881 detail::get_or_ignore<field::bit_score>(record_buffer));
882 },
883 format);
884 };
885
886 assert(!format.valueless_by_exception());
887
888 if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
889 call_read_func(*reference_sequences_ptr);
890 else
891 call_read_func(std::ignore);
892 }
893
895 friend iterator;
896};
897
903template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
904sam_file_input(stream_type && stream, file_format const &, selected_field_ids const &)
905 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
908
910template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
911sam_file_input(stream_type & stream, file_format const &, selected_field_ids const &)
912 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
915
917template <input_stream stream_type, sam_file_input_format file_format>
918sam_file_input(stream_type && stream, file_format const &)
919 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
920 typename sam_file_input<>::selected_field_ids, // actually use the default
922
924template <input_stream stream_type, sam_file_input_format file_format>
925sam_file_input(stream_type & stream, file_format const &)
926 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
927 typename sam_file_input<>::selected_field_ids, // actually use the default
929
931template <std::ranges::forward_range ref_ids_t,
932 std::ranges::forward_range ref_sequences_t,
934sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &)
938 typename sam_file_input<>::valid_formats>; // actually use the default
939
941template <std::ranges::forward_range ref_ids_t, std::ranges::forward_range ref_sequences_t>
942sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input<
944 typename sam_file_input<>::selected_field_ids, // actually use the default
945 typename sam_file_input<>::valid_formats>; // actually use the default
946
948template <input_stream stream_type,
949 std::ranges::forward_range ref_ids_t,
950 std::ranges::forward_range ref_sequences_t,
951 sam_file_input_format file_format,
953sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
958
960template <input_stream stream_type,
961 std::ranges::forward_range ref_ids_t,
962 std::ranges::forward_range ref_sequences_t,
963 sam_file_input_format file_format,
965sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
970
972template <input_stream stream_type,
973 std::ranges::forward_range ref_ids_t,
974 std::ranges::forward_range ref_sequences_t,
975 sam_file_input_format file_format>
976sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input<
978 typename sam_file_input<>::selected_field_ids, // actually use the default
980
982template <input_stream stream_type,
983 std::ranges::forward_range ref_ids_t,
984 std::ranges::forward_range ref_sequences_t,
985 sam_file_input_format file_format>
986sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input<
988 typename sam_file_input<>::selected_field_ids, // actually use the default
991
992} // namespace seqan3
Provides seqan3::aa27, container aliases and string literals.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
A combined alphabet that can hold values of either of its alternatives..
Definition: alphabet_variant.hpp:120
Input iterator necessary for providing a range-like interface in input file.
Definition: in_file_iterator.hpp:41
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap..
Definition: dna15.hpp:51
The five letter DNA alphabet of A,C,G,T and the unknown character N..
Definition: dna5.hpp:51
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:81
Quality type for traditional Sanger and modern Illumina Phred scores..
Definition: phred42.hpp:47
Stores the header information of alignment files.
Definition: header.hpp:34
The generic concept for alignment file input formats.
Definition: sam_file/input_format_concept.hpp:154
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: sam_file/input.hpp:256
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce ref_sequences_t and ref_ids_t, default the rest.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: sam_file/input.hpp:663
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: sam_file/input.hpp:422
std::optional< int32_t > ref_id_type
The type of field::ref_id is fixed to std::optional<int32_t>.
Definition: sam_file/input.hpp:314
void set_references(typename traits_type::ref_ids &ref_ids, ref_sequences_t &&ref_sequences)
Updates the reference information members and the header.
Definition: sam_file/input.hpp:815
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: sam_file/input.hpp:420
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce selected fields, ref_sequences_t and ref_ids_t, default the rest.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: sam_file/input.hpp:267
decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{})|std::views::transform(detail::access_restrictor_fn{})) dummy_ref_type
The dummy ref sequence type if no reference information were given.
Definition: sam_file/input.hpp:275
char stream_char_type
Character type of the stream(s).
Definition: sam_file/input.hpp:269
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce ref_sequences_t and ref_ids_t, and file format.
sam_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: sam_file/input.hpp:466
bool at_end
File is one position behind the last record.
Definition: sam_file/input.hpp:789
sam_file_input(stream_type &stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce file_format, and default the rest.
std::default_sentinel_t sentinel
The type returned by end().
Definition: sam_file/input.hpp:430
void read_next_record()
Tell the format to move to the next record and update the buffer.
Definition: sam_file/input.hpp:843
sam_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: sam_file/input.hpp:574
std::vector< char > stream_buffer
A larger (compared to stl default) stream buffer to use when reading from a file.
Definition: sam_file/input.hpp:761
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::seq (default std::vector<seqan3::dna5>).
Definition: sam_file/input.hpp:293
stream_ptr_t primary_stream
The primary stream is the user provided stream or the file stream if constructed from filename.
Definition: sam_file/input.hpp:782
format_type format
The actual std::variant holding a pointer to the detected/selected format.
Definition: sam_file/input.hpp:795
std::optional< int32_t > ref_offset_type
The type of field::ref_offset is fixed to a std::optional<int32_t>.
Definition: sam_file/input.hpp:321
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: sam_file/input.hpp:263
int32_t offset_type
The type of field::offset is fixed to int32_t.
Definition: sam_file/input.hpp:297
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
sam_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: sam_file/input.hpp:698
sam_file_input(stream_type &&stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce file_format, and default the rest.
sam_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: sam_file/input.hpp:494
static void stream_deleter_noop(std::basic_istream< stream_char_type > *)
Stream deleter that does nothing (no ownership assumed).
Definition: sam_file/input.hpp:773
detail::lazy_conditional_t< std::ranges::range< typename traits_type::ref_sequences const >, detail::lazy< std::ranges::range_reference_t, typename traits_type::ref_sequences const >, dummy_ref_type > ref_sequence_unsliced_type
The unsliced ref sequence type if reference information were given.
Definition: sam_file/input.hpp:281
typename traits_type::template id_container< char > id_type
The type of field::id (default std::string by default).
Definition: sam_file/input.hpp:295
sam_file_input & operator=(sam_file_input &&)=default
Move assignment is defaulted.
friend iterator
Befriend iterator so it can access the buffers.
Definition: sam_file/input.hpp:895
stream_ptr_t secondary_stream
The secondary stream is a compression layer on the primary or just points to the primary (no compress...
Definition: sam_file/input.hpp:784
sam_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: sam_file/input.hpp:589
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::alignment (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: sam_file/input.hpp:344
sam_record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: sam_file/input.hpp:408
typename detail::variant_from_tags< valid_formats, detail::sam_file_input_format_exposer >::type format_type
Type of the format, a std::variant over the valid_formats.
Definition: sam_file/input.hpp:792
sam_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
iterator begin()
Returns an iterator to current position in the file.
Definition: sam_file/input.hpp:638
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: sam_file/input.hpp:265
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
bool first_record_was_read
Tracks whether the very first record is buffered when calling begin().
Definition: sam_file/input.hpp:787
sam_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: sam_file/input.hpp:536
sam_file_input & operator=(sam_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file.
record_type record_buffer
Buffer for a single record.
Definition: sam_file/input.hpp:759
sam_file_input(sam_file_input &&)=default
Move construction is defaulted.
void init_by_format()
/brief Initialisation based on a format (construction via stream).
Definition: sam_file/input.hpp:743
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: sam_file/input.hpp:428
std::streampos position_buffer
Buffer for the previous record position.
Definition: sam_file/input.hpp:763
std::unique_ptr< header_type > header_ptr
The file header object.
Definition: sam_file/input.hpp:753
header_type & header()
Access the file's header.
Definition: sam_file/input.hpp:712
sam_file_input(sam_file_input const &)=delete
Copy construction is explicitly deleted because you cannot have multiple access to the same file.
uint8_t mapq_type
The type of field::mapq is fixed to uint8_t.
Definition: sam_file/input.hpp:323
sam_flag flag_type
The type of field::flag is fixed to seqan3::sam_flag.
Definition: sam_file/input.hpp:327
decltype(std::declval< ref_sequence_unsliced_type >()|views::slice(0, 0)) ref_sequence_sliced_type
The ref sequence type if reference information were given.
Definition: sam_file/input.hpp:284
sam_file_input(stream_type &&stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format > >
Deduce selected fields, file_format, and default the rest.
sam_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: sam_file/input.hpp:505
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
static void stream_deleter_default(std::basic_istream< stream_char_type > *ptr)
Stream deleter with default behaviour (ownership assumed).
Definition: sam_file/input.hpp:776
~sam_file_input()=default
Destructor is defaulted.
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::mate is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: sam_file/input.hpp:331
void init_by_filename(std::filesystem::path filename)
Definition: sam_file/input.hpp:728
reference front() noexcept
Return the record we are currently at in the file.
Definition: sam_file/input.hpp:691
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::qual (default std::vector<seqan3::phred42>).
Definition: sam_file/input.hpp:325
traits_type::ref_sequences const * reference_sequences_ptr
A pointer to the reference sequence information if given on construction.
Definition: sam_file/input.hpp:802
sam_file_input(stream_type &stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format > >
Deduce selected fields, file_format, and default the rest.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
Auxiliary concept that checks whether a type is a specialisation of seqan3::fields.
Definition: detail/record.hpp:35
T data(T... args)
Provides auxiliary data structures and functions for seqan3::record and seqan3::fields.
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
Provides the seqan3::format_bam.
Provides the seqan3::format_sam.
Provides seqan3::gap_decorator.
T get(T... args)
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
void set_format(format_variant_type &format, std::filesystem::path const &file_name)
Sets the file format according to the file name extension.
Definition: io/detail/misc.hpp:68
auto make_secondary_istream(std::basic_istream< char_t > &primary_stream, std::filesystem::path &filename) -> std::unique_ptr< std::basic_istream< char_t >, std::function< void(std::basic_istream< char_t > *)> >
Depending on the magic bytes of the given stream, return a decompression stream or forward the primar...
Definition: misc_input.hpp:80
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: type_list/traits.hpp:252
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: type_list/traits.hpp:470
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be explicitly converted to to.
The requirements a traits_type for seqan3::sam_file_input must meet.
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for input.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_file_input_format and auxiliary classes.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
T size(T... args)
Provides seqan3::views::slice.
An empty type whose only purpose is to hold an uninstantiated template plus its arguments.
Definition: lazy_conditional.hpp:33
Internal class used to expose the actual format interface to read alignment records from the file.
Definition: sam_file/input_format_concept.hpp:47
Base class to deduce the std::variant type from format tags.
Definition: io/detail/misc.hpp:31
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
static constexpr bool contains(field f)
Whether a field is contained in the parameter pack.
Definition: record.hpp:149
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: io/exception.hpp:39
void clear() noexcept(noexcept(std::apply(expander, std::declval< record & >())))
Clears containers that provide .clear() and (re-)initialises all other elements with = {}.
Definition: record.hpp:237
The default traits for seqan3::sam_file_input.
Definition: sam_file/input.hpp:186
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: sam_file/input.hpp:217
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: sam_file/input.hpp:214
Type that contains multiple types.
Definition: type_list.hpp:29
Provides seqan3::detail::transformation_trait_or.
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
T visit(T... args)