Rebol3 Code Examplex
Rosetta Code/Rank languages by popularity
Rank programming languages by their Rosetta Code presence.
Rebol [
title: "Rosetta code: Rosetta Code/Rank languages by popularity"
file: %Rosetta_Code-Rank_languages_by_popularity.r3
url: https://rosettacode.org/wiki/Rosetta_Code/Rank_languages_by_popularity
]
get-lang-popularity: function [
"Scrapes Rosetta Code to rank programming languages by number of task solutions"
][
base-url: https://rosettacode.org/wiki/Category:Programming_Languages
que: to block! base-url ;; Initialize queue with the starting URL
data: copy [] ;; Will hold [language pages language pages ...] pairs
;; Process each URL in the queue (handles pagination across subcategory pages)
while [not empty? que][
;; Dequeue and read the next URL
url: take que
print ["Reading:" as-blue url]
html: read/string url
;; Truncate the HTML at the page content section we don't need,
;; so the parser doesn't accidentally match links in the page footer
clear find html <div id="mw-pages">
next-cat: none ;; Will hold the "next page" subcategory token, if present
parse html [
;; Jump to the subcategories section of the page
thru <div id="mw-subcategories">
;; Optionally capture the pagination token for the next batch of subcategories
opt [
thru {<a href="/wiki/Category:Programming_Languages?subcatfrom=}
copy next-cat: to #"^""
]
;; Iterate over every language subcategory entry on this page
any [
thru {<bdi dir="ltr"><a href="}
copy cat-url: to #"^""
thru {title="Category:}
copy cat-ttl: to #"^""
thru {title="Contains }
copy contains: to #"^""
(
;print [pad copy cat-ttl -25 contains]
if parse contains [thru ", " copy pages: to SP to end][
replace/all pages #"," ""
repend data [cat-ttl to integer! pages]
]
)
]
]
;; If a next-page token was found, enqueue the continuation URL
if next-cat [
append que rejoin [base-url %?subcatfrom= next-cat]
]
]
;; Return the sorted [lang pages lang pages ...] block by page count, descending
sort/skip/compare/reverse data 2 2
]
;; --- Display Top 100 ---
data: get-lang-popularity ;; Fetch and rank all languages
n: 1 ;; Rank counter
print "Rosetta Code top 100 languages by tasks solved:"
foreach [language pages] data [
if n > 100 [break] ;; Stop after the top 100
;; Print rank, language name (highlighted), and solution count
print rejoin [pad n -3 ". " as-yellow language " (" pages ")"]
++ n
]
;; Timestamp when the data fetch completed
print ["Data received:" as-green now]