Rebol3 Code Examplex
FASTA format
Parse or emit biological sequence data in FASTA form.
Rebol [
title: "Rosetta code: FASTA format"
file: %FASTA_format.r3
url: https://rosettacode.org/wiki/FASTA_format
]
;; Parses a FASTA-format file in chunks, returning a flat block of [header sequence] pairs.
;; FASTA format alternates between header lines (starting with ">") and sequence data lines.
decode-fasta: function [
source [file! url!] "Source file or URL to read from"
chunk-size [integer!] "Number of bytes to read per iteration"
][
out: copy []
port: open/read source ;; open the source as a streaming read port
;; Stream through the file chunk by chunk to avoid loading it all into memory
while [not empty? data: read/string/part port chunk-size][
;; If the previous iteration had an incomplete (trailing) line, prepend it
;; to the current chunk so it is parsed as a whole line this time
if rest [insert data rest]
parse data [
any [
;; Match everything up to the next newline as one line, then skip the LF
copy line: to LF skip (
either line/1 == #">" [
;; Header line - strip the leading ">" and add a new entry to out;
;; also capture the new 'val' reference for subsequent sequence appends
repend out [remove line val: copy ""]
][
;; Sequence line - append its content to the current entry's value
append val line
]
)
]
;; Capture any trailing bytes that did not end with LF (incomplete line);
;; 'rest' will be prepended to the next chunk at the top of the loop
copy rest: opt [to end]
]
]
close port ;; Be nice and close the port when done reading
;; The final trailing bytes are part of the last value
if all [val rest] [append val rest]
;; Insert a new-line marker before every other element (i.e. before each header),
;; making the flat block easier to read when printed with 'probe'
new-line/skip out true 2
]
;; Prepare a test file
write %data.fasta
{>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED}
;; Run the decoder on a local FASTA file, reading 10 bytes at a time, and print the result
probe decode-fasta %data.fasta 10