SeqAn3 3.1.0-rc.2
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cassert>
16#include <seqan3/std/filesystem>
17#include <fstream>
18#include <seqan3/std/ranges>
19#include <string>
20#include <variant>
21#include <vector>
22
30#include <seqan3/io/record.hpp>
44
45namespace seqan3
46{
47
48// ----------------------------------------------------------------------------
49// sequence_file_output
50// ----------------------------------------------------------------------------
51
164template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::qual>,
165 detail::type_list_of_sequence_file_output_formats valid_formats_ =
166 type_list<format_embl, format_fasta, format_fastq, format_genbank, format_sam>>
168{
169public:
175 using selected_field_ids = selected_field_ids_;
177 using valid_formats = valid_formats_;
179 using stream_char_type = char;
181
184
185 static_assert([] () constexpr
186 {
187 for (field f : selected_field_ids::as_array)
188 if (!field_ids::contains(f))
189 return false;
190 return true;
191 }(),
192 "You selected a field that is not valid for sequence files, please refer to the documentation "
193 "of sequence_file_output::field_ids for the accepted values.");
194
201 using value_type = void;
203 using reference = void;
205 using const_reference = void;
207 using size_type = void;
213 using const_iterator = void;
215 using sentinel = std::default_sentinel_t;
217
233
250 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
252 {
253 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
254 static_cast<std::basic_ofstream<char> *>(primary_stream.get())->open(filename,
255 std::ios_base::out | std::ios::binary);
256
257 if (!primary_stream->good())
258 throw file_open_error{"Could not open file " + filename.string() + " for writing."};
259
260 // possibly add intermediate compression stream
262
263 // initialise format handler or throw if format is not found
264 detail::set_format(format, filename);
265 }
266
282 template <output_stream stream_t,
283 sequence_file_output_format file_format>
285 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
287 sequence_file_output(stream_t & stream,
288 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
289 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
292 format{detail::sequence_file_output_format_exposer<file_format>{}}
293 {
294 static_assert(list_traits::contains<file_format, valid_formats>,
295 "You selected a format that is not in the valid_formats of this file.");
296 }
297
299 template <output_stream stream_t,
300 sequence_file_output_format file_format>
302 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
304 sequence_file_output(stream_t && stream,
305 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
306 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
307 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
309 format{detail::sequence_file_output_format_exposer<file_format>{}}
310 {
311 static_assert(list_traits::contains<file_format, valid_formats>,
312 "You selected a format that is not in the valid_formats of this file.");
313 }
315
337 iterator begin() noexcept
338 {
339 return {*this};
340 }
341
356 sentinel end() noexcept
357 {
358 return {};
359 }
360
379 template <typename record_t>
380 void push_back(record_t && r)
382 requires detail::record_like<record_t>
384 {
385 write_record(detail::get_or_ignore<field::seq>(r),
386 detail::get_or_ignore<field::id>(r),
387 detail::get_or_ignore<field::qual>(r));
388 }
389
411 template <typename tuple_t>
412 void push_back(tuple_t && t)
414 requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
416 {
417 // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
418 write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
419 detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
420 detail::get_or_ignore<selected_field_ids::index_of(field::qual)>(t));
421 }
422
446 template <typename arg_t, typename ...arg_types>
447 void emplace_back(arg_t && arg, arg_types && ... args)
448 {
449 push_back(std::tie(arg, args...));
450 }
451
473 template <std::ranges::input_range rng_t>
478 {
479 for (auto && record : range)
480 push_back(std::forward<decltype(record)>(record));
481 return *this;
482 }
483
511 template <std::ranges::input_range rng_t>
516 {
517 f = range;
518 return f;
519 }
520
522 template <std::ranges::input_range rng_t>
527 {
528 #if defined(__GNUC__) && (__GNUC__ == 9) // an unreported build problem of GCC9
529 for (auto && record : range)
530 f.push_back(std::forward<decltype(record)>(record));
531 #else // ^^^ workaround | regular solution ↓↓↓
532 f = range;
533 #endif
534 return std::move(f);
535 }
537
540
545 {
546 return *secondary_stream;
547 }
549protected:
553
564
569
576
578 template <typename seq_t, typename id_t, typename qual_t>
579 void write_record(seq_t && seq, id_t && id, qual_t && qual)
580 {
581 assert(!format.valueless_by_exception());
582 std::visit([&] (auto & f)
583 {
584 {
585 f.write_sequence_record(*secondary_stream,
586 options,
587 seq,
588 id,
589 qual);
590 }
591 }, format);
592 }
593
595 friend iterator;
596};
597
604template <output_stream stream_t,
605 sequence_file_output_format file_format>
607 file_format const &)
610
612template <output_stream stream_t,
613 sequence_file_output_format file_format>
615 file_format const &)
618
620template <output_stream stream_t,
621 sequence_file_output_format file_format,
624 file_format const &,
625 selected_field_ids const &)
628
630template <output_stream stream_t,
631 sequence_file_output_format file_format,
634 file_format const &,
635 selected_field_ids const &)
639} // namespace seqan3
Output iterator necessary for providing a range-like interface in output file.
Definition: out_file_iterator.hpp:47
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: output.hpp:168
static void stream_deleter_noop(std::basic_ostream< stream_char_type > *)
Stream deleter that does nothing (no ownership assumed).
Definition: output.hpp:561
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: output.hpp:380
sequence_file_output(stream_t &, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format > >
This is an overloaded member function, provided for convenience. It differs from the above function o...
sequence_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:249
sequence_file_output(stream_t &&, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format > >
This is an overloaded member function, provided for convenience. It differs from the above function o...
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:213
char stream_char_type
Character type of the stream(s).
Definition: output.hpp:179
std::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:215
typename detail::variant_from_tags< valid_formats, detail::sequence_file_output_format_exposer >::type format_type
Type of the format, an std::variant over the valid_formats.
Definition: output.hpp:572
friend iterator
Befriend iterator so it can access the buffers.
Definition: output.hpp:595
sequence_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format > >
Deduction guide for given stream, file format and field ids.
friend sequence_file_output operator|(rng_t &&range, sequence_file_output &&f)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:523
format_type format
The actual std::variant holding a pointer to the detected/selected format.
Definition: output.hpp:574
sequence_file_output(sequence_file_output &&)=default
Move construction is defaulted.
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: output.hpp:412
stream_ptr_t primary_stream
The primary stream is the user provided stream or the file stream if constructed from filename.
Definition: output.hpp:566
sequence_file_output & operator=(sequence_file_output &&)=default
Move assignment is defaulted.
sequence_file_output(sequence_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
sequence_file_output(stream_t &, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format > >
Deduction guide for given stream and file format.
friend sequence_file_output & operator|(rng_t &&range, sequence_file_output &f)
Write a range of records (or tuples) to the file.
Definition: output.hpp:512
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:177
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:175
~sequence_file_output()=default
Destructor is defaulted.
std::basic_ostream< stream_char_type > & get_stream()
Expose a reference to the secondary stream. [public, but not documented as part of the API].
Definition: output.hpp:544
sequence_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:539
sequence_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: output.hpp:474
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:447
void value_type
The value type (void).
Definition: output.hpp:201
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:356
void reference
The reference type (void).
Definition: output.hpp:203
std::vector< char > stream_buffer
A larger (compared to stl default) stream buffer to use when reading from a file.
Definition: output.hpp:552
static void stream_deleter_default(std::basic_ostream< stream_char_type > *ptr)
Stream deleter with default behaviour (ownership assumed).
Definition: output.hpp:563
stream_ptr_t secondary_stream
The secondary stream is a compression layer on the primary or just points to the primary (no compress...
Definition: output.hpp:568
void const_reference
The const reference type (void).
Definition: output.hpp:205
void write_record(seq_t &&seq, id_t &&id, qual_t &&qual)
Write record to format.
Definition: output.hpp:579
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
sequence_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:287
sequence_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:304
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:337
void size_type
The size type (void).
Definition: output.hpp:207
Provides auxiliary data structures and functions for seqan3::record and seqan3::fields.
Provides seqan3::views::elements.
This header includes C++17 filesystem support and imports it into namespace std::filesystem (independ...
Provides the seqan3::sequence_file_format_genbank class.
Provides the seqan3::format_sam.
T forward(T... args)
T get(T... args)
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
auto & get_or_ignore(record< field_types, field_ids > &r)
Access an element in a std::tuple or seqan3::record; return reference to std::ignore if not contained...
Definition: record.hpp:117
void set_format(format_variant_type &format, std::filesystem::path const &file_name)
Sets the file format according to the file name extension.
Definition: misc.hpp:67
auto make_secondary_ostream(std::basic_ostream< char_t > &primary_stream, std::filesystem::path &filename) -> std::unique_ptr< std::basic_ostream< char_t >, std::function< void(std::basic_ostream< char_t > *)> >
Depending on the given filename/extension, create a compression stream or just forward the primary st...
Definition: misc_output.hpp:42
SEQAN3_CONCEPT fields_specialisation
Auxiliary concept that checks whether a type is a specialisation of seqan3::fields.
Definition: record.hpp:35
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
The generic concept for sequence file out formats.
Whether a type behaves like a tuple.
Provides various utility functions.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition: cigar_operation_table.hpp:2
Provides the seqan3::detail::out_file_iterator class template.
Provides algorithms for meta programming, parameter packs and seqan3::type_list.
Adaptations of concepts from the Ranges TS.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
Provides seqan3::sequence_file_output_format and auxiliary classes.
Provides seqan3::sequence_file_output_options.
Internal class used to expose the actual format interface to write sequence records into the file.
Definition: output_format_concept.hpp:40
Base class to deduce the std::variant type from format tags.
Definition: misc.hpp:30
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
static constexpr bool contains(field f)
Whether a field is contained in the parameter pack.
Definition: record.hpp:149
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:191
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:23
Type that contains multiple types.
Definition: type_list.hpp:29
T tie(T... args)
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
Provides seqan3::views::convert.
T visit(T... args)
Provides seqan3::views::zip.