Merge pull request #245 from martindurant/docs

martindurant · web-flow · commit 04cdb3a578e8 · 2019-05-07T11:07:08.000-04:00
update some docs and examples
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -41,9 +41,11 @@ Sources
 .. autosummary::
    filenames
    from_kafka
+   from_kafka_batched
    from_process
    from_textfile
-   from_socket
+   from_tcp
+   from_http_server
 
 DaskStream
 ----------
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -56,9 +56,10 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.0.1'
+import streamz
+version = streamz.__version__
 # The full version, including alpha/beta/rc tags.
-release = '0.0.1'
+release = streamz.__version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -88,5 +88,4 @@ data streaming systems like `Apache Flink <https://flink.apache.org/>`_,
    collections.rst
    api.rst
    collections-api.rst
-   dataframe-aggregations.rst
    async.rst
diff --git a/examples/network_wordcount.py b/examples/network_wordcount.py
@@ -0,0 +1,21 @@
+#! /usr/env python
+""" a recreation of spark-streaming's network_wordcount
+
+https://spark.apache.org/docs/2.2.0/streaming-programming-guide.html#a-quick-example
+"""
+import time
+from streamz import Stream
+
+# absolute port on localhost for now
+s = Stream.from_tcp(9999)
+s.map(bytes.split).flatten().frequencies().sink(print)
+
+print(
+    """In another terminal execute
+> nc 127.0.0.1 9999
+and then start typing content
+"""
+)
+
+s.start()
+time.sleep(600)
diff --git a/streamz/sources.py b/streamz/sources.py
@@ -52,22 +52,22 @@ class from_textfile(Source):
     Parameters
     ----------
     f: file or string
+        Source of the data. If string, will be opened.
     poll_interval: Number
         Interval to poll file for new data in seconds
-    delimiter: str ("\n")
+    delimiter: str
         Character(s) to use to split the data into parts
-    start: bool (False)
+    start: bool
         Whether to start running immediately; otherwise call stream.start()
         explicitly.
-    from_end: bool (False)
+    from_end: bool
         Whether to begin streaming from the end of the file (i.e., only emit
         lines appended after the stream starts).
 
-    Example
-    -------
+    Examples
+    --------
     >>> source = Stream.from_textfile('myfile.json')  # doctest: +SKIP
     >>> js.map(json.loads).pluck('value').sum().sink(print)  # doctest: +SKIP
-
     >>> source.start()  # doctest: +SKIP
 
     Returns
@@ -186,8 +186,8 @@ class from_tcp(Source):
     server_kwargs : dict or None
         If given, additional arguments to pass to TCPServer
 
-    Example
-    -------
+    Examples
+    --------
 
     >>> source = Source.from_tcp(4567)  # doctest: +SKIP
     """
@@ -253,9 +253,11 @@ class from_http_server(Source):
     server_kwargs : dict or None
         If given, set of further parameters to pass on to HTTPServer
 
-    Example
-    -------
+    Examples
+    --------
+
     >>> source = Source.from_http_server(4567)  # doctest: +SKIP
+
     """
 
     def __init__(self, port, path='/.*', start=False, server_kwargs=None):
@@ -382,20 +384,22 @@ class from_kafka(Source):
         https://docs.confluent.io/current/clients/confluent-kafka-python/#configuration
         https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
         Examples:
-        bootstrap.servers: Connection string(s) (host:port) by which to reach Kafka
-        group.id: Identity of the consumer. If multiple sources share the same
-            group, each message will be passed to only one of them.
+        bootstrap.servers, Connection string(s) (host:port) by which to reach
+        Kafka;
+        group.id, Identity of the consumer. If multiple sources share the same
+        group, each message will be passed to only one of them.
     poll_interval: number
         Seconds that elapse between polling Kafka for new messages
     start: bool (False)
         Whether to start polling upon instantiation
 
-    Example
-    -------
+    Examples
+    --------
 
     >>> source = Stream.from_kafka(['mytopic'],
     ...           {'bootstrap.servers': 'localhost:9092',
     ...            'group.id': 'streamz'})  # doctest: +SKIP
+
     """
     def __init__(self, topics, consumer_params, poll_interval=0.1, start=False, **kwargs):
         self.cpars = consumer_params
@@ -532,12 +536,13 @@ def from_kafka_batched(topic, consumer_params, poll_interval='1s',
     start: bool (False)
         Whether to start polling upon instantiation
 
-    Example
-    -------
+    Examples
+    --------
 
     >>> source = Stream.from_kafka_batched('mytopic',
     ...           {'bootstrap.servers': 'localhost:9092',
     ...            'group.id': 'streamz'}, npartitions=4)  # doctest: +SKIP
+
     """
     if dask:
         from distributed.client import default_client