Rebol3 Code Examplex
Burrows–Wheeler transform
Implement the transform used in data compression algorithms.
Rebol [
title: "Rosetta code: Burrows–Wheeler transform"
file: %Burrows–Wheeler_transform.r3
url: https://rosettacode.org/wiki/Burrows–Wheeler_transform
]
bwt: function [
"Burrows–Wheeler Transform"
input [string!] "The original text to encode"
][
;; Add sentinel markers:
;; STX (^(02)) at the front to mark the start
;; ETX (^(03)) at the end to mark the terminus
;; These ensure unambiguous reconstruction and a fixed sort position.
input: rejoin [#"^(02)" input #"^(03)"]
len: length? input ;; Length of the sentinel-marked string
rotations: clear [] ;; Will hold all cyclic rotations of the string
;; Generate all cyclic rotations:
repeat i len [
append rotations rejoin [
copy at input i ;; from position i to end
copy/part input i - 1 ;; then from start up to (i-1)
]
]
;; Sort all rotations lexicographically
rotations: sort rotations
transformed: copy "" ;; This will be the BWT output (last column)
;; Take the last character from each sorted rotation and accumulate
foreach r rotations [
append transformed last r
]
;; Return the "last column" as the BWT-transformed string
transformed
]
ibwt: function [
"Inverse Burrows–Wheeler Transform"
input [string!] "The BWT-produced last column string"
][
;; Make a copy so we preserve the original argument
input: copy input
len: length? input ;; Number of rows/characters
;; Initialise a table (block of strings), initially containing `len` empty strings
table: make block! len
loop len [append table copy ""]
;; Rebuild rotation table iteratively:
;; Each iteration:
;; 1. Prepend each character of last column to corresponding row
;; 2. Sort the rows
repeat j len [
repeat i len [
insert table/:i input/:i ;; insert char from last column at row start
]
table: sort table ;; keep table lexicographically sorted
]
;; After len iterations, table contains all sorted rotations.
;; The original rotation is the one starting with STX and ending with ETX.
;; With STX as the smallest char, it will always be the first row (table/1).
if table/1/1 == #"^(02)" [
;; Skip STX, copy the rest minus ETX.
;; `next table/1` skips first char (STX)
;; `len - 2` excludes both sentinels
copy/part next table/1 len - 2
]
]
; Example usage
foreach [word expected] [
"banana" "^Cannb^Baa"
"abracadabra" "^Card^Brcaaaabb"
][
print ["Test word:" mold word]
print [" bwt:" mold a: bwt word]
print [" ibwt:" mold b: ibwt a lf]
assert [a == expected]
assert [b == word]
]