bullet-db
diff --git a/‎docs/quick-start/spark.md‎
Lines changed: 321 additions & 42 deletions b/‎docs/quick-start/spark.md‎
Lines changed: 321 additions & 42 deletions
diff --git a/‎docs/quick-start/storm.md‎
Lines changed: 41 additions & 37 deletions b/‎docs/quick-start/storm.md‎
Lines changed: 41 additions & 37 deletions
diff --git a/‎docs/ui/usage.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/ui/usage.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/install-all-spark.sh‎
Lines changed: 61 additions & 79 deletions b/‎examples/install-all-spark.sh‎
Lines changed: 61 additions & 79 deletions
@@ -43,7 +43,7 @@ mkdir -p $BULLET_HOME/backend/storm
 mkdir -p $BULLET_HOME/service
 mkdir -p $BULLET_HOME/ui
 cd $BULLET_HOME
-curl -LO https://github.com/bullet-db/bullet-db.github.io/releases/download/src/examples_artifacts.tar.gz
+curl -LO https://github.com/bullet-db/bullet-db.github.io/releases/download/v0.5.2/examples_artifacts.tar.gz
 tar -xzf examples_artifacts.tar.gz
 export BULLET_EXAMPLES=$BULLET_HOME/bullet-examples
 ```
@@ -125,7 +125,7 @@ Visit the UI and see if the topology is up. You should see the ```DataSource```
 
 !!! note "Where is this data coming from?"
 
-    This data is randomly generated by the [custom Storm spout](https://github.com/bullet-db/bullet-db.github.io/blob/src/examples/storm/src/main/java/com/yahoo/bullet/storm/examples/RandomSpout.java) that is in the example topology you just launched. In practice, your spout would read from an actual data source such as Kafka instead. See [below](#storm-topology) for more details about this random data spout.
+    This data is randomly generated by the [custom Storm spout](https://github.com/bullet-db/bullet-db.github.io/blob/src/examples/storm/src/main/java/com/yahoo/bullet/storm/examples/RandomSpout.java) that is in the example topology you just launched. In practice, your spout would read from an actual data source such as Kafka etc. See [below](#storm-topology) for more details about this random data spout.
 
 ### Setting up the Bullet Web Service
 
@@ -271,43 +271,47 @@ This method above emits the tuples. The Storm framework calls this method. This
     When the spout emits the randomly generated tuple, it attaches a ```DUMMY_ID``` to it. In Storm terms, this is a message ID. By adding a message ID, this tuple can be made to flow reliably. The Bullet component that receives this tuple (Filter bolt) acknowledges or "acks" this tuple. If the tuple did not make it to Filter bolt within a configured timeout window, Storm will call a ```fail(Object messageId)``` method on the spout. This particular spout does not define one and hence the usage of a ```DUMMY_ID```. If your source of data can identify records uniquely and you can re-emit them on a fail, you should attach that actual ID in place of the ```DUMMY_ID```.
 
 ```java
-    private BulletRecord generateRecord() {
-        BulletRecord record = new BulletRecord();
-        String uuid = UUID.randomUUID().toString();
-
-        record.setString(STRING, uuid);
-        record.setLong(LONG, (long) generatedThisPeriod);
-        record.setDouble(DOUBLE, random.nextDouble());
-        record.setString(TYPE, STRING_POOL[random.nextInt(STRING_POOL.length)]);
-        record.setLong(DURATION, System.currentTimeMillis() % INTEGER_POOL[random.nextInt(INTEGER_POOL.length)]);
-
-        Map<String, Boolean> booleanMap = new HashMap<>(4);
-        booleanMap.put(uuid.substring(0, 8), random.nextBoolean());
-        booleanMap.put(uuid.substring(9, 13), random.nextBoolean());
-        booleanMap.put(uuid.substring(14, 18), random.nextBoolean());
-        booleanMap.put(uuid.substring(19, 23), random.nextBoolean());
-        record.setBooleanMap(BOOLEAN_MAP, booleanMap);
-
-        Map<String, Long> statsMap = new HashMap<>(4);
-        statsMap.put(PERIOD_COUNT, periodCount);
-        statsMap.put(RECORD_NUMBER, periodCount * maxPerPeriod + generatedThisPeriod);
-        statsMap.put(NANO_TIME, System.nanoTime());
-        statsMap.put(TIMESTAMP, System.currentTimeMillis());
-        record.setLongMap(STATS_MAP, statsMap);
-
-        Map<String, String> randomMapA = new HashMap<>(2);
-        Map<String, String> randomMapB = new HashMap<>(2);
-        randomMapA.put(RANDOM_MAP_KEY_A, STRING_POOL[random.nextInt(STRING_POOL.length)]);
-        randomMapA.put(RANDOM_MAP_KEY_B, STRING_POOL[random.nextInt(STRING_POOL.length)]);
-        randomMapB.put(RANDOM_MAP_KEY_A, STRING_POOL[random.nextInt(STRING_POOL.length)]);
-        randomMapB.put(RANDOM_MAP_KEY_B, STRING_POOL[random.nextInt(STRING_POOL.length)]);
-        record.setListOfStringMap(LIST, asList(randomMapA, randomMapB));
-
-        return record;
-    }
+private Map<String, String> makeRandomMap() {
+    Map<String, String> randomMap = new HashMap<>(2);
+    randomMap.put(RANDOM_MAP_KEY_A, STRING_POOL[random.nextInt(STRING_POOL.length)]);
+    randomMap.put(RANDOM_MAP_KEY_B, STRING_POOL[random.nextInt(STRING_POOL.length)]);
+    return randomMap;
+}
+
+private BulletRecord generateRecord() {
+    BulletRecord record = new AvroBulletRecord();
+    String uuid = UUID.randomUUID().toString();
+
+    record.setString(STRING, uuid);
+    record.setLong(LONG, (long) generatedThisPeriod);
+    record.setDouble(DOUBLE, random.nextDouble());
+    record.setDouble(GAUSSIAN, random.nextGaussian());
+    record.setString(TYPE, STRING_POOL[random.nextInt(STRING_POOL.length)]);
+    record.setLong(DURATION, System.currentTimeMillis() % INTEGER_POOL[random.nextInt(INTEGER_POOL.length)]);
+
+    record.setStringMap(SUBTYPES_MAP, makeRandomMap());
+
+    Map<String, Boolean> booleanMap = new HashMap<>(4);
+    booleanMap.put(uuid.substring(0, 8), random.nextBoolean());
+    booleanMap.put(uuid.substring(9, 13), random.nextBoolean());
+    booleanMap.put(uuid.substring(14, 18), random.nextBoolean());
+    booleanMap.put(uuid.substring(19, 23), random.nextBoolean());
+    record.setBooleanMap(BOOLEAN_MAP, booleanMap);
+
+    Map<String, Long> statsMap = new HashMap<>(4);
+    statsMap.put(PERIOD_COUNT, periodCount);
+    statsMap.put(RECORD_NUMBER, periodCount * maxPerPeriod + generatedThisPeriod);
+    statsMap.put(NANO_TIME, System.nanoTime());
+    statsMap.put(TIMESTAMP, System.currentTimeMillis());
+    record.setLongMap(STATS_MAP, statsMap);
+
+    record.setListOfStringMap(LIST, asList(makeRandomMap(), makeRandomMap()));
+
+    return record;
+}
 ```
 
-This method generates some fields randomly and inserts them into a BulletRecord. Note that the BulletRecord is typed and all data must be inserted with the proper types.
+This ```generateRecord``` method generates some fields randomly and inserts them into a BulletRecord. Note that the BulletRecord is typed and all data must be inserted with the proper types.
 
 If you put Bullet on your data, you will need to write a Spout (or a topology if your reading is complex), that reads from your data source and emits BulletRecords with the fields you wish to be query-able placed into a BulletRecord similar to this example.
 
 
@@ -2,10 +2,10 @@
 
 The UI should (hopefully) be self-explanatory. Any particular section that requires additional information has the ![info](../img/info.png) icon next to it. Clicking this will display information relevant to that section.
 
-The interactions in this page are running on the topology that was set up in the [Quick Start on Storm](../quick-start/storm.md).  Recall that the example backend is configured to produce *20 data records every 101 ms.*.
+The interactions in this page are running on the topology that was set up in the [Quick Start on Storm](../quick-start/storm.md).  Recall that that example backend is configured to produce *20 data records every 101 ms.*.
 
 !!! note "NOTE: Some of these videos use an old version of the Bullet UI"
-    We are currently in progress adding new videos with windowing, etc.
+    We are currently in progress adding new videos with windowing and other new features from the latest UI version etc.
 
 ## Landing page
 
 
@@ -2,9 +2,9 @@
 
 set -euo pipefail
 
-BULLET_EXAMPLES_VERSION=0.4.0
-BULLET_UI_VERSION=0.4.0
-BULLET_WS_VERSION=0.2.1
+BULLET_EXAMPLES_VERSION=0.5.1
+BULLET_UI_VERSION=0.5.0
+BULLET_WS_VERSION=0.3.0
 BULLET_KAFKA_VERSION=0.3.0
 KAFKA_VERSION=0.11.0.1
 SPARK_VERSION=2.2.1
@@ -25,6 +25,8 @@ print_versions() {
     println "Bullet Examples:    ${BULLET_EXAMPLES_VERSION}"
     println "Bullet Web Service: ${BULLET_WS_VERSION}"
     println "Bullet UI:          ${BULLET_UI_VERSION}"
+    println "Bullet Kafka:       ${BULLET_KAFKA_VERSION}"
+    println "Spark:              ${SPARK_VERSION}"
     println "Kafka:              ${KAFKA_VERSION}"
     println "NVM:                ${NVM_VERSION}"
     println "Node.js:            ${NODE_VERSION}"
@@ -51,9 +53,13 @@ export_vars() {
 
     println "Exporting some variables..."
     export BULLET_HOME="${PWD}/bullet-quickstart"
-    export BULLET_EXAMPLES=$BULLET_HOME/bullet-examples
-    export BULLET_DOWNLOADS=$BULLET_HOME/bullet-downloads
-    export BULLET_SPARK=${BULLET_HOME}/backend/spark
+    export BULLET_EXAMPLES="$BULLET_HOME/bullet-examples"
+    export BULLET_DOWNLOADS="$BULLET_HOME/bullet-downloads"
+    export BULLET_SPARK="${BULLET_HOME}/backend/spark"
+    export KAFKA_DISTRO="kafka_2.12-${KAFKA_VERSION}"
+    export KAFKA_DIR="${BULLET_HOME}/pubsub"
+    export SPARK_DISTRO="spark-${SPARK_VERSION}-bin-hadoop2.7"
+    export SPARK_DIR="${BULLET_SPARK}/${SPARK_DISTRO}"
     println "Done!"
 }
 
@@ -77,51 +83,48 @@ install_bullet_examples() {
 }
 
 install_kafka() {
-    local KAFKA="kafka_2.12-${KAFKA_VERSION}"
-    local PUBSUB="${BULLET_HOME}/pubsub/"
-
     println "Downloading Kafka ${KAFKA_VERSION}..."
-    download "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}" "${KAFKA}.tgz"
+    download "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}" "${KAFKA_DISTRO}.tgz"
 
-    println "Installing Kafka ..."
-    tar -xzf ${BULLET_DOWNLOADS}/${KAFKA}.tgz -C ${PUBSUB}
-    export KAFKA_DIR=${PUBSUB}${KAFKA}
+    println "Installing Kafka to ${KAFKA_DIR}..."
+    tar -xzf ${BULLET_DOWNLOADS}/${KAFKA}.tgz -C ${KAFKA_DIR}
 
     println "Done!"
 }
 
 install_bullet_kafka() {
     local BULLET_KAFKA="bullet-kafka-${BULLET_KAFKA_VERSION}-fat.jar"
-    local PUBSUB="${BULLET_HOME}/pubsub/"
 
     println "Downloading bullet-kafka ${BULLET_KAFKA_VERSION}..."
     download "http://jcenter.bintray.com/com/yahoo/bullet/bullet-kafka/${BULLET_KAFKA_VERSION}" "${BULLET_KAFKA}"
-    cp ${BULLET_DOWNLOADS}/${BULLET_KAFKA} ${PUBSUB}${BULLET_KAFKA}
-    export BULLET_KAFKA_JAR=${PUBSUB}${BULLET_KAFKA}
+    cp ${BULLET_DOWNLOADS}/${BULLET_KAFKA} ${BULLET_HOME}/pubsub/${BULLET_KAFKA}
 
     println "Done!"
 }
 
 launch_kafka() {
+    local KAFKA_DIR=${KAFKA_DIR}/${KAFKA_DISTRO}
     println "Launching Zookeeper..."
     $KAFKA_DIR/bin/zookeeper-server-start.sh $KAFKA_DIR/config/zookeeper.properties &
-    sleep 3
+    println "Sleeping for 10s to ensure Zookeeper is up..."
+    sleep 10
 
     println "Launching Kafka..."
     $KAFKA_DIR/bin/kafka-server-start.sh $KAFKA_DIR/config/server.properties &
-
-    sleep 3
+    println "Sleeping for 10s to ensure Kafka is up..."
+    sleep 10
     println "Done!"
 }
 
 create_topics() {
+    local KAFKA_DIR=${KAFKA_DIR}/${KAFKA_DISTRO}
     set +e
-    println "Creating kafka topics ${KAFKA_TOPIC_REQUESTS} and ${KAFKA_TOPIC_RESPONSES}..."
+    println "Creating Kafka topics ${KAFKA_TOPIC_REQUESTS} and ${KAFKA_TOPIC_RESPONSES}..."
     $KAFKA_DIR/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ${KAFKA_TOPIC_REQUESTS}
     $KAFKA_DIR/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ${KAFKA_TOPIC_RESPONSES}
     set -e
 
-    sleep 3
+    println "Sleeping for 10s to ensure Kafka topics are created..."
     println "Done!"
 }
 
@@ -145,62 +148,57 @@ launch_web_service() {
 
     println "Launching Bullet Web Service..."
     cd "${BULLET_SERVICE_HOME}"
-    java -Dloader.path=${BULLET_KAFKA_JAR} -jar ${BULLET_WS_JAR} \
+    java -Dloader.path=${BULLET_HOME}/pubsub/bullet-kafka-${BULLET_KAFKA_VERSION}-fat.jar -jar ${BULLET_WS_JAR} \
         --bullet.pubsub.config=${BULLET_SERVICE_HOME}/example_kafka_pubsub_config.yaml \
         --bullet.schema.file=${BULLET_SERVICE_HOME}/example_columns.json \
         --server.port=9999  \
-        --logging.path=. \
-        --logging.file=log.txt &> log.txt &
+        --logging.path=${BULLET_SERVICE_HOME} \
+        --logging.file=log.txt &> ${BULLET_SERVICE_HOME}/log.txt &
 
     println "Sleeping for 15 s to ensure Bullet Web Service is up..."
     sleep 15
 
-    println "Testing the Web Service: Getting column schema..."
+    println "Getting one random record from Bullet through the Web Service..."
+    println "curl -s -H 'Content-Type: text/plain' -X POST -d '{\"aggregation\": {\"size\": 1}}' http://localhost:9999/api/bullet/sse-query"
+    println ""
+    println "Getting column schema from the Web Service..."
     println ""
     curl -s http://localhost:9999/api/bullet/columns
-    println "Finished Bullet Web Service test"
+    println "Finished Bullet Web Service test!"
 }
 
 install_spark() {
-    local SPARK="spark-${SPARK_VERSION}-bin-hadoop2.7.tgz"
-
     println "Downloading Spark version ${SPARK_VERSION}..."
-    download "http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}" "${SPARK}"
-        
-    println "Installing Spark version ${SPARK_VERSION}..."
-    cp ${BULLET_DOWNLOADS}/${SPARK} ${BULLET_HOME}/backend/spark/
-
-    tar -xzf "${BULLET_HOME}/backend/spark/${SPARK}" -C "${BULLET_HOME}/backend/spark/"
-    export SPARK_DIR="${BULLET_HOME}/backend/spark/spark-${SPARK_VERSION}-bin-hadoop2.7"
+    download "http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}" "${SPARK_DISTRO}.tgz"
 
+    println "Installing Spark version ${SPARK_VERSION}..."
+    cp ${BULLET_DOWNLOADS}/${SPARK_DISTRO}.tgz ${BULLET_SPARK}/
+    tar -xzf "${BULLET_SPARK}/${SPARK_DISTRO}.tgz" -C ${BULLET_SPARK}
     println "Done!"
 }
 
-install_bullet_spark() {
-    cp $BULLET_HOME/bullet-examples/backend/spark/* $BULLET_SPARK
-    # Remove this 88 - THIS STILL NEEDS to be implemented - download the thing (it's not available online yet because we haven't released this version yet):
-    # Something like this: curl -Lo bullet-spark.jar http://jcenter.bintray.com/com/yahoo/bullet/bullet-spark/0.1.1/bullet-spark-0.1.1-standalone.jar
-}
-
 launch_bullet_spark() {
+    local BULLET_KAFKA_JAR=bullet-kafka-${BULLET_KAFKA_VERSION}-fat.jar
+
+    println "Copying Bullet Spark configuration and artifacts..."
+    cp $BULLET_HOME/bullet-examples/backend/spark/* $BULLET_SPARK
     cd ${BULLET_SPARK}
-    println "Launching bullet-spark..."
+    println "Launching Bullet Spark..."
+    println "=============================================================================="
     ${SPARK_DIR}/bin/spark-submit \
         --master local[10]  \
         --class com.yahoo.bullet.spark.BulletSparkStreamingMain \
-        --driver-class-path $BULLET_SPARK/bullet-spark.jar:${BULLET_KAFKA_JAR}:$BULLET_SPARK/bullet-spark-example.jar \
+        --driver-class-path $BULLET_SPARK/bullet-spark.jar:${BULLET_HOME}/pubsub$/${BULLET_KAFKA_JAR}:$BULLET_SPARK/bullet-spark-example.jar \
         $BULLET_SPARK/bullet-spark.jar \
         --bullet-spark-conf=$BULLET_SPARK/bullet_spark_settings.yaml &> log.txt &
 
-    println "Sleeping for 15 s to ensure bullet-spark is up and running..."
+    println "Sleeping for 15 s to ensure Bullet Spark is up and running..."
+    println "=============================================================================="
     sleep 15
 
-    println "Done! You should now be able to query Bullet through the web service. Try this:"
-    println "curl -s -H 'Content-Type: text/plain' -X POST -d '{\"aggregation\": {\"size\": 1}}' http://localhost:9999/api/bullet/sse-query"
+    println "Done!"
 }
 
-
-
 install_node() {
     # NVM unset var bug
     set +u
@@ -250,18 +248,19 @@ launch_bullet_ui() {
 }
 
 cleanup() {
+    local KAFKA_INSTALL_DIR=${KAFKA_DIR}/${KAFKA_DISTRO}
     set +e
 
     pkill -f "[e]xpress-server.js"
     pkill -f "[e]xample_kafka_pubsub_config.yaml"
     pkill -f "[b]ullet-spark"
-    ${KAFKA_DIR}/bin/kafka-server-stop.sh
-    ${KAFKA_DIR}/bin/zookeeper-server-stop.sh
+    ${KAFKA_INSTALL_DIR}/bin/kafka-server-stop.sh
+    ${KAFKA_INSTALL_DIR}/bin/zookeeper-server-stop.sh
 
     sleep 3
 
     rm -rf "${BULLET_EXAMPLES}" "${BULLET_HOME}/backend" "${BULLET_HOME}/service" \
-           "${BULLET_HOME}/ui" "${BULLET_HOME}/pubsub" /tmp/dev-storm-zookeeper
+           "${BULLET_HOME}/ui" "${BULLET_HOME}/pubsub"
 
     set -e
 }
@@ -275,10 +274,11 @@ teardown() {
 unset_all() {
     unset -f print_versions println download export_vars setup \
              install_bullet_examples \
-             install_storm launch_storm launch_bullet_storm \
-             launch_bullet_web_service \
+             install_kafka install_bullet_kafka launch_kafka create_topics \
+             install_spark launch_bullet_spark \
+             install_web_service launch_web_service \
              install_node launch_bullet_ui \
-             cleanup teardown unset_all launch
+             cleanup teardown unset_all launch clean
 }
 
 launch() {
@@ -288,42 +288,24 @@ launch() {
     teardown
 
     setup
-
-    # install_bullet_examples
-    # <------------- Remove this 88 - the above line needs to be uncommented and all the below stuff should be removed once this artifact actualy exists on the git cloud or whatever
-    cp ~/bullet/bullet-db.github.io/examples/examples_artifacts.tar.gz ${BULLET_DOWNLOADS}/
-    tar -xzf "${BULLET_DOWNLOADS}/examples_artifacts.tar.gz" -C "${BULLET_HOME}" # <------------ Remove this 88 - remove this line and the one above it once the artifact is actulaly on github
+    install_bullet_examples
 
     install_kafka
     install_bullet_kafka
     launch_kafka
     create_topics
 
-    install_web_service
-    launch_web_service
-
     install_spark
-    # install_bullet_spark
-    # <------------- Remove this 88 - the above line needs to be uncommented and all the below stuff should be removed once this artifact actualy exists on the git cloud or whatever
-    cp $BULLET_HOME/bullet-examples/backend/spark/* $BULLET_SPARK # <------------ Remove this 88
-    cp ~/bullet/bullet-spark/target/bullet-spark-0.1.1-SNAPSHOT-standalone.jar $BULLET_SPARK/bullet-spark.jar # <------------ Remove this 88
-
     launch_bullet_spark
 
-    # Remove this 88 - deal with the following two lines:
-    # Now do the UI stuff once the new UI is ready
-    # ALSO - DON'T FORGET! The teardown stuff doesn't work unless you run the whole script (the "else" block at the bottom won't work) because the KAFKA_DIR isn't defined unless you run install_kafka function) - so fix that somehow
-
-
-
-
-
+    install_web_service
+    launch_web_service
 
-    # install_node
-    # launch_bullet_ui
+    install_node
+    launch_bullet_ui
 
-    # println "All components launched! Visit http://localhost:8800 (default) for the UI"
-    # unset_all
+    println "All components launched! Visit http://localhost:8800 (default) for the UI"
+    unset_all
 }
 
 clean() {