Rebol3 Code Examplex
Rosetta_Code/Count_examples
Count task examples in Rosetta Code.
Rebol [
title: "Rosetta code: Rosetta_Code/Count_examples"
file: %Rosetta_Code-Count_examples.r3
url: https://rosettacode.org/wiki/Rosetta_Code/Count_examples
]
;; Import caching and HTML decoding utilities
import thru-cache
import html-entities
;; Set HTTPS request timeout to 30 seconds
system/schemes/https/spec/timeout: 30
get-all-task-titles: function [
"Scrapes all Rosetta Code task titles"
][
;; Build the base API URL for querying the Programming Tasks category
base-url: rejoin [
https://rosettacode.org/w/api.php
"?action=query&format=xml&list=categorymembers&cmlimit=500"
"&cmtitle=Category:Programming_Tasks"
]
que: to block! base-url ;; Initialize queue with the starting URL
titles: copy [] ;; Will hold task titles as [pageid title]
;; Process each URL in the queue (handles pagination across subcategory pages)
while [not empty? que][
;; Dequeue and read the next URL
url: take que
print ["Reading:" as-blue find/tail url "&cmtitle="]
xml: read-thru/string url
cmcontinue: none ;; Will hold the "next page" subcategory token, if present
parse xml [
;; Try to extract the pagination continuation token
opt [
thru {<continue cmcontinue="}
copy cmcontinue: to #"^""
]
thru <categorymembers>
;; Extract pageid and title from each <cm> element
any [
thru {<cm pageid="} copy pageid: to #"^""
thru { title="} copy title: to #"^""
(
;; Store pageid as integer alongside title
repend titles [to integer! pageid title]
)
]
]
;; If a cmcontinue token was found, enqueue the continuation URL
if cmcontinue [
append que rejoin [base-url "&cmcontinue=" cmcontinue]
]
]
titles
]
get-task-examples: function[
"Return language names with solution for a given task"
task
][
;; Decode HTML entities and re-encode for use in a URL
task: enhex decode 'html-entities task
url: join https://rosettacode.org/w/index.php?action=raw&title= task
data: read-thru/string url
;; Collect all language header names from the raw wiki markup
parse data [
collect any [thru "=={{header|" keep to "}}=="]
]
]
;; Fetch all task titles from Rosetta Code
titles: get-all-task-titles
;; Count language examples per task
counts: make block! length? titles
foreach [pageid title] titles [
try/with [
langs: get-task-examples title
num: length? langs
print [title as-green num]
;; Store [count title] pairs for later sorting
repend counts [num title]
] :print ;; On error, print the error and continue
]
;; Sort by example count, descending
sort/skip/reverse counts 2
;; Display the 10 tasks with the most language examples
print as-yellow "^/Top 10 tasks with the most examples:"
loop 10 [
print [counts/2 "has" counts/1 "examples."]
counts: skip counts 2
]
;; Seek to the last 20 entries (10 pairs) to find the least-covered tasks
print as-yellow "^/Top 10 tasks with the minimum examples:"
counts: skip tail counts -20
loop 10 [
print [counts/2 "has" counts/1 "examples."]
counts: skip counts 2
]
;; Timestamp when the data fetch completed
print ["^/Data received:" as-green now]