SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
misc_input.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <concepts>
17#include <filesystem>
18#include <iostream>
19#include <ranges>
20#include <span>
21#include <string>
22#include <tuple>
23
24#if defined(SEQAN3_HAS_BZIP2)
25# include <seqan3/contrib/stream/bz2_istream.hpp>
26#endif
27#if defined(SEQAN3_HAS_ZLIB)
28# include <seqan3/contrib/stream/bgzf_istream.hpp>
30# include <seqan3/contrib/stream/gz_istream.hpp>
31#endif
36
37namespace seqan3::detail
38{
39
45template <std::ranges::forward_range ref_t, std::ranges::forward_range query_t>
46inline bool starts_with(ref_t && reference, query_t && query)
47 requires std::equality_comparable_with<std::ranges::range_reference_t<ref_t>,
48 std::ranges::range_reference_t<query_t>>
49{
50 auto rit = std::ranges::begin(reference);
51 auto rend = std::ranges::end(reference);
52
53 auto qit = std::ranges::begin(query);
54 auto qend = std::ranges::end(query);
55
56 while (true)
57 {
58 if (qit == qend)
59 return true;
60
61 if (rit == rend)
62 return false;
63
64 if (*qit != *rit)
65 return false;
66
67 ++qit;
68 ++rit;
69 }
70}
71
79template <builtin_character char_t>
80inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream, std::filesystem::path & filename)
82{
83 assert(primary_stream.good());
84
85 // don't assume ownership
86 constexpr auto stream_deleter_noop = [](std::basic_istream<char_t> *) {};
87 // assume ownership
88 [[maybe_unused]] constexpr auto stream_deleter_default = [](std::basic_istream<char_t> * ptr)
89 {
90 delete ptr;
91 };
92
93 // extract "magic header"
94 std::istreambuf_iterator<char_t> it{primary_stream};
95 std::array<char, bgzf_compression::magic_header.size()> magic_number{}; // Largest magic header from bgzf
96 size_t read_chars = 0;
97 for (; read_chars < magic_number.size(); ++read_chars)
98 {
100 break;
101
102 magic_number[read_chars] = *it;
103 ++it;
104 }
105
106 // unget all read chars.
107 for (size_t i = 0; i < read_chars; ++i)
108 primary_stream.unget();
109
110 std::string extension{};
111 if (filename.has_extension())
112 extension = filename.extension().string().substr(1);
113
114 // tests whether the given extension matches with one of the given compression tags.
115 [[maybe_unused]] auto contains_extension = [](auto compression_tag, auto const & extension) constexpr
116 {
117 return std::ranges::find(decltype(compression_tag)::file_extensions, extension)
118 != std::ranges::end(decltype(compression_tag)::file_extensions);
119 };
120
121 // set return value appropriately
122 if (read_chars == magic_number.size() && bgzf_compression::validate_header(std::span{magic_number})) // BGZF
123 {
124#if defined(SEQAN3_HAS_ZLIB)
125 if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
126 filename.replace_extension();
127
128 return {new contrib::basic_bgzf_istream<char_t>{primary_stream}, stream_deleter_default};
129#else
130 throw file_open_error{"Trying to read from a bgzf file, but no ZLIB available."};
131#endif
132 }
133 else if (starts_with(magic_number, gz_compression::magic_header)) // GZIP
134 {
135#if defined(SEQAN3_HAS_ZLIB)
136 if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
137 filename.replace_extension();
138
139 return {new contrib::basic_gz_istream<char_t>{primary_stream}, stream_deleter_default};
140#else
141 throw file_open_error{"Trying to read from a gzipped file, but no ZLIB available."};
142#endif
143 }
144 else if (starts_with(magic_number, bz2_compression::magic_header)) // BZip2
145 {
146#if defined(SEQAN3_HAS_BZIP2)
147 if (contains_extension(bz2_compression{}, extension))
148 filename.replace_extension();
149
150 return {new contrib::basic_bz2_istream<char_t>{primary_stream}, stream_deleter_default};
151#else
152 throw file_open_error{"Trying to read from a bzipped file, but no libbz2 available."};
153#endif
154 }
155 else if (starts_with(magic_number, zstd_compression::magic_header)) // ZStd
156 {
157 throw file_open_error{"Trying to read from a zst'ed file, but SeqAn does not yet support this."};
158 }
159
160 return {&primary_stream, stream_deleter_noop};
161}
162
164template <builtin_character char_t>
165inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream)
166{
168 return make_secondary_istream(primary_stream, p);
169}
170
171} // namespace seqan3::detail
T begin(T... args)
Provides seqan3::contrib::bgzf_thread_count.
Provides stream compression utilities.
T find(T... args)
Provides exceptions used in the I/O module.
Provides seqan3::detail::magic_header.
T rend(T... args)
T size(T... args)
Provides concepts that do not have equivalents in C++20.