Skip to content

Commit fa6cdba

Browse files
authored
Bring back SeqT (#546)
1 parent 46a1e9d commit fa6cdba

File tree

7 files changed

+1772
-9
lines changed

7 files changed

+1772
-9
lines changed

docsrc/content/type-seqt.fsx

Lines changed: 154 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,165 @@
11
(*** hide ***)
22
// This block of code is omitted in the generated HTML documentation. Use
33
// it to define helpers that you do not want to show in the documentation.
4+
5+
#r @"../../src/FSharpPlus/bin/Release/netstandard2.0/FSharpPlus.dll"
6+
7+
// For some reason AsyncDownloadString is not found during doc build. The following is a dumb implementation just to make the compiler happy.
8+
// TODO find out why.
9+
10+
type System.Net.WebClient with member wc.AsyncDownloadString (uri: System.Uri) = async { return wc.DownloadString uri }
11+
412
(**
5-
TO-DO Add some docs here !
6-
=========================
13+
SeqT<Monad<bool>, 'T>
14+
=====================
15+
16+
This is the the Monad Transformer for `seq<'T>` so it adds sequencing to existing monads by composing them with `seq<'T>`.
17+
18+
Any monad can be composed, but a very typical usage is when combined with `Async` or `Task`, which gives rise to what's called async sequences.
19+
20+
Therefore the [AsyncSeq](https://github.com/fsprojects/FSharp.Control.AsyncSeq) library can be considered a specialization of this monad in Async.
21+
22+
The original post from AsyncSeq can be found [here](http://tomasp.net/blog/async-sequences.aspx) and we can run those examples with `SeqT` by adapting the code.
23+
24+
In order to do so we need to be aware of the design differences of both implementations.
25+
26+
<style>
27+
body #fsdocs-content table, th, td { border: 1px solid black;border-collapse: collapse; }
28+
body #fsdocs-content table code { word-break: normal; }
29+
</style>
30+
31+
| **AsyncSeq** | **SeqT** | **Notes** |
32+
|:------------------------------|:--------------------------------------|:---------:|
33+
|`AsyncSeq<'T>` |`SeqT<Async<bool>, 'T>` | |
34+
|`asyncSeq { .. }` |`monad.plus { .. }` | At some point it needs to be inferred as `SeqT<Async<bool>, 'T>`, or it can be specified with type parameters: `monad<SeqT<Async<bool>, 'T>>.plus` |
35+
|`let! x = y` |`let! x = SeqT.lift y` | No auto lifting. Lifting should be explicit. |
36+
|`do! x` |`do! SeqT.lift x` | '' |
37+
|`for x in s` |`let! x = s` | When `s: SeqT` otherwise `for` is still ok with regular sequences. |
38+
|`AsyncSeq.[function]` |`SeqT.[function]` | See differences in functions below. |
39+
|`AsyncSeq.[function]Async` |`SeqT.[function]M` | '' |
40+
|`AsyncSeq.skip` |`SeqT.drop` | `.skip` is available but consistently with F# collections, it throws when the sequence doesn't have enough elements. |
41+
|`AsyncSeq.take` |`SeqT.truncate` | `.take` is available but consistently with F# collections, it throws when the sequence doesn't have enough elements. |
42+
|`AsyncSeq.toBlockingSequence` |`SeqT.run >> Async.RunSynchronously` | Not really the same but semantically equivalent. |
43+
|`AsyncSeq.toListAsync` |`SeqT.runAsList` | |
44+
|`AsyncSeq.toArrayAsync` |`SeqT.runAsArray` | |
45+
|`AsyncSeq.zipWith` |`SeqT.map2` | Aligned with F# collections. |
46+
|`AsyncSeq.zipWithAsync` |`SeqT.map2M` | '' |
47+
|`AsyncSeq.ofObservable` |`Observable.toAsyncSeq` |`.toTaskSeq` is also available. |
48+
|`AsyncSeq.toObservable` |`Observable.ofAsyncSeq` |`.ofTaskSeq` is also available. |
49+
50+
751
852
Examples
953
--------
1054
*)
1155

56+
#r "nuget: FSharpPlus,1.3.0-CI02744" // still as pre-release
57+
58+
open System
59+
open System.Net
60+
open FSharpPlus
61+
open FSharpPlus.Data
62+
63+
let urls =
64+
[ "http://bing.com"; "http://yahoo.com";
65+
"http://google.com"; "http://msn.com"; ]
66+
67+
// Asynchronous sequence that returns URLs and lengths
68+
// of the downloaded HTML. Web pages from a given list
69+
// are downloaded asynchronously in sequence.
70+
let pages: SeqT<_, _> = monad.plus {
71+
use wc = new WebClient ()
72+
for url in urls do
73+
try
74+
let! html = wc.AsyncDownloadString (Uri url) |> SeqT.lift
75+
yield url, html.Length
76+
with _ ->
77+
yield url, -1 }
78+
79+
80+
// Print URL of pages that are smaller than 100k
81+
let printPages =
82+
pages
83+
|> SeqT.filter (fun (_, len) -> len < 100000)
84+
|> SeqT.map fst
85+
|> SeqT.iter (printfn "%s")
86+
87+
printPages |> Async.Start
88+
89+
90+
(**
91+
These samples above and below come from the [original AsyncSeq post](http://tomasp.net/blog/async-sequences.aspx) and they can be easily switched to task sequeces (taskSeq), simply add `|> Async.StartAsTask` between `wc.AsyncDownloadString (Uri url)` and `|> SeqT.lift` then run eveything but the `printPages |> Async.Start`.
92+
*)
93+
94+
// A simple webcrawler
95+
96+
#r "nuget: FSharpPlus,1.3.0-CI02744"
97+
#r "nuget: HtmlAgilityPack"
98+
99+
open System
100+
open System.Net
101+
open System.Text.RegularExpressions
102+
open HtmlAgilityPack
103+
open FSharp.Control
104+
105+
open FSharpPlus
106+
open FSharpPlus.Data
107+
108+
// ----------------------------------------------------------------------------
109+
// Helper functions for downloading documents, extracting links etc.
110+
111+
/// Asynchronously download the document and parse the HTML
112+
let downloadDocument url = async {
113+
try let wc = new WebClient ()
114+
let! html = wc.AsyncDownloadString (Uri url)
115+
let doc = new HtmlDocument ()
116+
doc.LoadHtml html
117+
return Some doc
118+
with _ -> return None }
119+
120+
/// Extract all links from the document that start with "http://"
121+
let extractLinks (doc:HtmlDocument) =
122+
try
123+
[ for a in doc.DocumentNode.SelectNodes ("//a") do
124+
if a.Attributes.Contains "href" then
125+
let href = a.Attributes.["href"].Value
126+
if href.StartsWith "https://" then
127+
let endl = href.IndexOf '?'
128+
yield if endl > 0 then href.Substring(0, endl) else href ]
129+
with _ -> []
130+
131+
/// Extract the <title> of the web page
132+
let getTitle (doc: HtmlDocument) =
133+
let title = doc.DocumentNode.SelectSingleNode "//title"
134+
if title <> null then title.InnerText.Trim () else "Untitled"
135+
136+
// ----------------------------------------------------------------------------
137+
// Basic crawling - crawl web pages and follow just one link from every page
138+
139+
/// Crawl the internet starting from the specified page
140+
/// From each page follow the first not-yet-visited page
141+
let rec randomCrawl url =
142+
let visited = new System.Collections.Generic.HashSet<_> ()
12143

13-
#r @"../../src/FSharpPlus/bin/Release/net45/FSharpPlus.dll"
144+
// Visits page and then recursively visits all referenced pages
145+
let rec loop url = monad.plus {
146+
if visited.Add(url) then
147+
let! doc = downloadDocument url |> SeqT.lift
148+
match doc with
149+
| Some doc ->
150+
// Yield url and title as the next element
151+
yield url, getTitle doc
152+
// For every link, yield all referenced pages too
153+
for link in extractLinks doc do
154+
yield! loop link
155+
| _ -> () }
156+
loop url
14157

15-
open FSharpPlus
158+
// Use SeqT combinators to print the titles of the first 10
159+
// web sites that are from other domains than en.wikipedia.org
160+
randomCrawl "https://en.wikipedia.org/wiki/Main_Page"
161+
|> SeqT.filter (fun (url, title) -> url.Contains "en.wikipedia.org" |> not)
162+
|> SeqT.map snd
163+
|> SeqT.take 10
164+
|> SeqT.iter (printfn "%s")
165+
|> Async.Start

0 commit comments

Comments
 (0)