Skip to content

Commit 11deed0

Browse files
committed
Updating Quickstart with new fields and adding Spark one-liner
1 parent 75fa088 commit 11deed0

File tree

9 files changed

+474
-187
lines changed

9 files changed

+474
-187
lines changed

docs/quick-start/spark.md

Lines changed: 321 additions & 42 deletions
Large diffs are not rendered by default.

docs/quick-start/storm.md

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ mkdir -p $BULLET_HOME/backend/storm
4343
mkdir -p $BULLET_HOME/service
4444
mkdir -p $BULLET_HOME/ui
4545
cd $BULLET_HOME
46-
curl -LO https://github.com/bullet-db/bullet-db.github.io/releases/download/src/examples_artifacts.tar.gz
46+
curl -LO https://github.com/bullet-db/bullet-db.github.io/releases/download/v0.5.2/examples_artifacts.tar.gz
4747
tar -xzf examples_artifacts.tar.gz
4848
export BULLET_EXAMPLES=$BULLET_HOME/bullet-examples
4949
```
@@ -125,7 +125,7 @@ Visit the UI and see if the topology is up. You should see the ```DataSource```
125125

126126
!!! note "Where is this data coming from?"
127127

128-
This data is randomly generated by the [custom Storm spout](https://github.com/bullet-db/bullet-db.github.io/blob/src/examples/storm/src/main/java/com/yahoo/bullet/storm/examples/RandomSpout.java) that is in the example topology you just launched. In practice, your spout would read from an actual data source such as Kafka instead. See [below](#storm-topology) for more details about this random data spout.
128+
This data is randomly generated by the [custom Storm spout](https://github.com/bullet-db/bullet-db.github.io/blob/src/examples/storm/src/main/java/com/yahoo/bullet/storm/examples/RandomSpout.java) that is in the example topology you just launched. In practice, your spout would read from an actual data source such as Kafka etc. See [below](#storm-topology) for more details about this random data spout.
129129

130130
### Setting up the Bullet Web Service
131131

@@ -271,43 +271,47 @@ This method above emits the tuples. The Storm framework calls this method. This
271271
When the spout emits the randomly generated tuple, it attaches a ```DUMMY_ID``` to it. In Storm terms, this is a message ID. By adding a message ID, this tuple can be made to flow reliably. The Bullet component that receives this tuple (Filter bolt) acknowledges or "acks" this tuple. If the tuple did not make it to Filter bolt within a configured timeout window, Storm will call a ```fail(Object messageId)``` method on the spout. This particular spout does not define one and hence the usage of a ```DUMMY_ID```. If your source of data can identify records uniquely and you can re-emit them on a fail, you should attach that actual ID in place of the ```DUMMY_ID```.
272272

273273
```java
274-
private BulletRecord generateRecord() {
275-
BulletRecord record = new BulletRecord();
276-
String uuid = UUID.randomUUID().toString();
277-
278-
record.setString(STRING, uuid);
279-
record.setLong(LONG, (long) generatedThisPeriod);
280-
record.setDouble(DOUBLE, random.nextDouble());
281-
record.setString(TYPE, STRING_POOL[random.nextInt(STRING_POOL.length)]);
282-
record.setLong(DURATION, System.currentTimeMillis() % INTEGER_POOL[random.nextInt(INTEGER_POOL.length)]);
283-
284-
Map<String, Boolean> booleanMap = new HashMap<>(4);
285-
booleanMap.put(uuid.substring(0, 8), random.nextBoolean());
286-
booleanMap.put(uuid.substring(9, 13), random.nextBoolean());
287-
booleanMap.put(uuid.substring(14, 18), random.nextBoolean());
288-
booleanMap.put(uuid.substring(19, 23), random.nextBoolean());
289-
record.setBooleanMap(BOOLEAN_MAP, booleanMap);
290-
291-
Map<String, Long> statsMap = new HashMap<>(4);
292-
statsMap.put(PERIOD_COUNT, periodCount);
293-
statsMap.put(RECORD_NUMBER, periodCount * maxPerPeriod + generatedThisPeriod);
294-
statsMap.put(NANO_TIME, System.nanoTime());
295-
statsMap.put(TIMESTAMP, System.currentTimeMillis());
296-
record.setLongMap(STATS_MAP, statsMap);
297-
298-
Map<String, String> randomMapA = new HashMap<>(2);
299-
Map<String, String> randomMapB = new HashMap<>(2);
300-
randomMapA.put(RANDOM_MAP_KEY_A, STRING_POOL[random.nextInt(STRING_POOL.length)]);
301-
randomMapA.put(RANDOM_MAP_KEY_B, STRING_POOL[random.nextInt(STRING_POOL.length)]);
302-
randomMapB.put(RANDOM_MAP_KEY_A, STRING_POOL[random.nextInt(STRING_POOL.length)]);
303-
randomMapB.put(RANDOM_MAP_KEY_B, STRING_POOL[random.nextInt(STRING_POOL.length)]);
304-
record.setListOfStringMap(LIST, asList(randomMapA, randomMapB));
305-
306-
return record;
307-
}
274+
private Map<String, String> makeRandomMap() {
275+
Map<String, String> randomMap = new HashMap<>(2);
276+
randomMap.put(RANDOM_MAP_KEY_A, STRING_POOL[random.nextInt(STRING_POOL.length)]);
277+
randomMap.put(RANDOM_MAP_KEY_B, STRING_POOL[random.nextInt(STRING_POOL.length)]);
278+
return randomMap;
279+
}
280+
281+
private BulletRecord generateRecord() {
282+
BulletRecord record = new AvroBulletRecord();
283+
String uuid = UUID.randomUUID().toString();
284+
285+
record.setString(STRING, uuid);
286+
record.setLong(LONG, (long) generatedThisPeriod);
287+
record.setDouble(DOUBLE, random.nextDouble());
288+
record.setDouble(GAUSSIAN, random.nextGaussian());
289+
record.setString(TYPE, STRING_POOL[random.nextInt(STRING_POOL.length)]);
290+
record.setLong(DURATION, System.currentTimeMillis() % INTEGER_POOL[random.nextInt(INTEGER_POOL.length)]);
291+
292+
record.setStringMap(SUBTYPES_MAP, makeRandomMap());
293+
294+
Map<String, Boolean> booleanMap = new HashMap<>(4);
295+
booleanMap.put(uuid.substring(0, 8), random.nextBoolean());
296+
booleanMap.put(uuid.substring(9, 13), random.nextBoolean());
297+
booleanMap.put(uuid.substring(14, 18), random.nextBoolean());
298+
booleanMap.put(uuid.substring(19, 23), random.nextBoolean());
299+
record.setBooleanMap(BOOLEAN_MAP, booleanMap);
300+
301+
Map<String, Long> statsMap = new HashMap<>(4);
302+
statsMap.put(PERIOD_COUNT, periodCount);
303+
statsMap.put(RECORD_NUMBER, periodCount * maxPerPeriod + generatedThisPeriod);
304+
statsMap.put(NANO_TIME, System.nanoTime());
305+
statsMap.put(TIMESTAMP, System.currentTimeMillis());
306+
record.setLongMap(STATS_MAP, statsMap);
307+
308+
record.setListOfStringMap(LIST, asList(makeRandomMap(), makeRandomMap()));
309+
310+
return record;
311+
}
308312
```
309313

310-
This method generates some fields randomly and inserts them into a BulletRecord. Note that the BulletRecord is typed and all data must be inserted with the proper types.
314+
This ```generateRecord``` method generates some fields randomly and inserts them into a BulletRecord. Note that the BulletRecord is typed and all data must be inserted with the proper types.
311315

312316
If you put Bullet on your data, you will need to write a Spout (or a topology if your reading is complex), that reads from your data source and emits BulletRecords with the fields you wish to be query-able placed into a BulletRecord similar to this example.
313317

docs/ui/usage.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
The UI should (hopefully) be self-explanatory. Any particular section that requires additional information has the ![info](../img/info.png) icon next to it. Clicking this will display information relevant to that section.
44

5-
The interactions in this page are running on the topology that was set up in the [Quick Start on Storm](../quick-start/storm.md). Recall that the example backend is configured to produce *20 data records every 101 ms.*.
5+
The interactions in this page are running on the topology that was set up in the [Quick Start on Storm](../quick-start/storm.md). Recall that that example backend is configured to produce *20 data records every 101 ms.*.
66

77
!!! note "NOTE: Some of these videos use an old version of the Bullet UI"
8-
We are currently in progress adding new videos with windowing, etc.
8+
We are currently in progress adding new videos with windowing and other new features from the latest UI version etc.
99

1010
## Landing page
1111

examples/install-all-spark.sh

Lines changed: 61 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
set -euo pipefail
44

5-
BULLET_EXAMPLES_VERSION=0.4.0
6-
BULLET_UI_VERSION=0.4.0
7-
BULLET_WS_VERSION=0.2.1
5+
BULLET_EXAMPLES_VERSION=0.5.1
6+
BULLET_UI_VERSION=0.5.0
7+
BULLET_WS_VERSION=0.3.0
88
BULLET_KAFKA_VERSION=0.3.0
99
KAFKA_VERSION=0.11.0.1
1010
SPARK_VERSION=2.2.1
@@ -25,6 +25,8 @@ print_versions() {
2525
println "Bullet Examples: ${BULLET_EXAMPLES_VERSION}"
2626
println "Bullet Web Service: ${BULLET_WS_VERSION}"
2727
println "Bullet UI: ${BULLET_UI_VERSION}"
28+
println "Bullet Kafka: ${BULLET_KAFKA_VERSION}"
29+
println "Spark: ${SPARK_VERSION}"
2830
println "Kafka: ${KAFKA_VERSION}"
2931
println "NVM: ${NVM_VERSION}"
3032
println "Node.js: ${NODE_VERSION}"
@@ -51,9 +53,13 @@ export_vars() {
5153

5254
println "Exporting some variables..."
5355
export BULLET_HOME="${PWD}/bullet-quickstart"
54-
export BULLET_EXAMPLES=$BULLET_HOME/bullet-examples
55-
export BULLET_DOWNLOADS=$BULLET_HOME/bullet-downloads
56-
export BULLET_SPARK=${BULLET_HOME}/backend/spark
56+
export BULLET_EXAMPLES="$BULLET_HOME/bullet-examples"
57+
export BULLET_DOWNLOADS="$BULLET_HOME/bullet-downloads"
58+
export BULLET_SPARK="${BULLET_HOME}/backend/spark"
59+
export KAFKA_DISTRO="kafka_2.12-${KAFKA_VERSION}"
60+
export KAFKA_DIR="${BULLET_HOME}/pubsub"
61+
export SPARK_DISTRO="spark-${SPARK_VERSION}-bin-hadoop2.7"
62+
export SPARK_DIR="${BULLET_SPARK}/${SPARK_DISTRO}"
5763
println "Done!"
5864
}
5965

@@ -77,51 +83,48 @@ install_bullet_examples() {
7783
}
7884

7985
install_kafka() {
80-
local KAFKA="kafka_2.12-${KAFKA_VERSION}"
81-
local PUBSUB="${BULLET_HOME}/pubsub/"
82-
8386
println "Downloading Kafka ${KAFKA_VERSION}..."
84-
download "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}" "${KAFKA}.tgz"
87+
download "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}" "${KAFKA_DISTRO}.tgz"
8588

86-
println "Installing Kafka ..."
87-
tar -xzf ${BULLET_DOWNLOADS}/${KAFKA}.tgz -C ${PUBSUB}
88-
export KAFKA_DIR=${PUBSUB}${KAFKA}
89+
println "Installing Kafka to ${KAFKA_DIR}..."
90+
tar -xzf ${BULLET_DOWNLOADS}/${KAFKA}.tgz -C ${KAFKA_DIR}
8991

9092
println "Done!"
9193
}
9294

9395
install_bullet_kafka() {
9496
local BULLET_KAFKA="bullet-kafka-${BULLET_KAFKA_VERSION}-fat.jar"
95-
local PUBSUB="${BULLET_HOME}/pubsub/"
9697

9798
println "Downloading bullet-kafka ${BULLET_KAFKA_VERSION}..."
9899
download "http://jcenter.bintray.com/com/yahoo/bullet/bullet-kafka/${BULLET_KAFKA_VERSION}" "${BULLET_KAFKA}"
99-
cp ${BULLET_DOWNLOADS}/${BULLET_KAFKA} ${PUBSUB}${BULLET_KAFKA}
100-
export BULLET_KAFKA_JAR=${PUBSUB}${BULLET_KAFKA}
100+
cp ${BULLET_DOWNLOADS}/${BULLET_KAFKA} ${BULLET_HOME}/pubsub/${BULLET_KAFKA}
101101

102102
println "Done!"
103103
}
104104

105105
launch_kafka() {
106+
local KAFKA_DIR=${KAFKA_DIR}/${KAFKA_DISTRO}
106107
println "Launching Zookeeper..."
107108
$KAFKA_DIR/bin/zookeeper-server-start.sh $KAFKA_DIR/config/zookeeper.properties &
108-
sleep 3
109+
println "Sleeping for 10s to ensure Zookeeper is up..."
110+
sleep 10
109111

110112
println "Launching Kafka..."
111113
$KAFKA_DIR/bin/kafka-server-start.sh $KAFKA_DIR/config/server.properties &
112-
113-
sleep 3
114+
println "Sleeping for 10s to ensure Kafka is up..."
115+
sleep 10
114116
println "Done!"
115117
}
116118

117119
create_topics() {
120+
local KAFKA_DIR=${KAFKA_DIR}/${KAFKA_DISTRO}
118121
set +e
119-
println "Creating kafka topics ${KAFKA_TOPIC_REQUESTS} and ${KAFKA_TOPIC_RESPONSES}..."
122+
println "Creating Kafka topics ${KAFKA_TOPIC_REQUESTS} and ${KAFKA_TOPIC_RESPONSES}..."
120123
$KAFKA_DIR/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ${KAFKA_TOPIC_REQUESTS}
121124
$KAFKA_DIR/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ${KAFKA_TOPIC_RESPONSES}
122125
set -e
123126

124-
sleep 3
127+
println "Sleeping for 10s to ensure Kafka topics are created..."
125128
println "Done!"
126129
}
127130

@@ -145,62 +148,57 @@ launch_web_service() {
145148

146149
println "Launching Bullet Web Service..."
147150
cd "${BULLET_SERVICE_HOME}"
148-
java -Dloader.path=${BULLET_KAFKA_JAR} -jar ${BULLET_WS_JAR} \
151+
java -Dloader.path=${BULLET_HOME}/pubsub/bullet-kafka-${BULLET_KAFKA_VERSION}-fat.jar -jar ${BULLET_WS_JAR} \
149152
--bullet.pubsub.config=${BULLET_SERVICE_HOME}/example_kafka_pubsub_config.yaml \
150153
--bullet.schema.file=${BULLET_SERVICE_HOME}/example_columns.json \
151154
--server.port=9999 \
152-
--logging.path=. \
153-
--logging.file=log.txt &> log.txt &
155+
--logging.path=${BULLET_SERVICE_HOME} \
156+
--logging.file=log.txt &> ${BULLET_SERVICE_HOME}/log.txt &
154157

155158
println "Sleeping for 15 s to ensure Bullet Web Service is up..."
156159
sleep 15
157160

158-
println "Testing the Web Service: Getting column schema..."
161+
println "Getting one random record from Bullet through the Web Service..."
162+
println "curl -s -H 'Content-Type: text/plain' -X POST -d '{\"aggregation\": {\"size\": 1}}' http://localhost:9999/api/bullet/sse-query"
163+
println ""
164+
println "Getting column schema from the Web Service..."
159165
println ""
160166
curl -s http://localhost:9999/api/bullet/columns
161-
println "Finished Bullet Web Service test"
167+
println "Finished Bullet Web Service test!"
162168
}
163169

164170
install_spark() {
165-
local SPARK="spark-${SPARK_VERSION}-bin-hadoop2.7.tgz"
166-
167171
println "Downloading Spark version ${SPARK_VERSION}..."
168-
download "http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}" "${SPARK}"
169-
170-
println "Installing Spark version ${SPARK_VERSION}..."
171-
cp ${BULLET_DOWNLOADS}/${SPARK} ${BULLET_HOME}/backend/spark/
172-
173-
tar -xzf "${BULLET_HOME}/backend/spark/${SPARK}" -C "${BULLET_HOME}/backend/spark/"
174-
export SPARK_DIR="${BULLET_HOME}/backend/spark/spark-${SPARK_VERSION}-bin-hadoop2.7"
172+
download "http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}" "${SPARK_DISTRO}.tgz"
175173

174+
println "Installing Spark version ${SPARK_VERSION}..."
175+
cp ${BULLET_DOWNLOADS}/${SPARK_DISTRO}.tgz ${BULLET_SPARK}/
176+
tar -xzf "${BULLET_SPARK}/${SPARK_DISTRO}.tgz" -C ${BULLET_SPARK}
176177
println "Done!"
177178
}
178179

179-
install_bullet_spark() {
180-
cp $BULLET_HOME/bullet-examples/backend/spark/* $BULLET_SPARK
181-
# Remove this 88 - THIS STILL NEEDS to be implemented - download the thing (it's not available online yet because we haven't released this version yet):
182-
# Something like this: curl -Lo bullet-spark.jar http://jcenter.bintray.com/com/yahoo/bullet/bullet-spark/0.1.1/bullet-spark-0.1.1-standalone.jar
183-
}
184-
185180
launch_bullet_spark() {
181+
local BULLET_KAFKA_JAR=bullet-kafka-${BULLET_KAFKA_VERSION}-fat.jar
182+
183+
println "Copying Bullet Spark configuration and artifacts..."
184+
cp $BULLET_HOME/bullet-examples/backend/spark/* $BULLET_SPARK
186185
cd ${BULLET_SPARK}
187-
println "Launching bullet-spark..."
186+
println "Launching Bullet Spark..."
187+
println "=============================================================================="
188188
${SPARK_DIR}/bin/spark-submit \
189189
--master local[10] \
190190
--class com.yahoo.bullet.spark.BulletSparkStreamingMain \
191-
--driver-class-path $BULLET_SPARK/bullet-spark.jar:${BULLET_KAFKA_JAR}:$BULLET_SPARK/bullet-spark-example.jar \
191+
--driver-class-path $BULLET_SPARK/bullet-spark.jar:${BULLET_HOME}/pubsub$/${BULLET_KAFKA_JAR}:$BULLET_SPARK/bullet-spark-example.jar \
192192
$BULLET_SPARK/bullet-spark.jar \
193193
--bullet-spark-conf=$BULLET_SPARK/bullet_spark_settings.yaml &> log.txt &
194194

195-
println "Sleeping for 15 s to ensure bullet-spark is up and running..."
195+
println "Sleeping for 15 s to ensure Bullet Spark is up and running..."
196+
println "=============================================================================="
196197
sleep 15
197198

198-
println "Done! You should now be able to query Bullet through the web service. Try this:"
199-
println "curl -s -H 'Content-Type: text/plain' -X POST -d '{\"aggregation\": {\"size\": 1}}' http://localhost:9999/api/bullet/sse-query"
199+
println "Done!"
200200
}
201201

202-
203-
204202
install_node() {
205203
# NVM unset var bug
206204
set +u
@@ -250,18 +248,19 @@ launch_bullet_ui() {
250248
}
251249

252250
cleanup() {
251+
local KAFKA_INSTALL_DIR=${KAFKA_DIR}/${KAFKA_DISTRO}
253252
set +e
254253

255254
pkill -f "[e]xpress-server.js"
256255
pkill -f "[e]xample_kafka_pubsub_config.yaml"
257256
pkill -f "[b]ullet-spark"
258-
${KAFKA_DIR}/bin/kafka-server-stop.sh
259-
${KAFKA_DIR}/bin/zookeeper-server-stop.sh
257+
${KAFKA_INSTALL_DIR}/bin/kafka-server-stop.sh
258+
${KAFKA_INSTALL_DIR}/bin/zookeeper-server-stop.sh
260259

261260
sleep 3
262261

263262
rm -rf "${BULLET_EXAMPLES}" "${BULLET_HOME}/backend" "${BULLET_HOME}/service" \
264-
"${BULLET_HOME}/ui" "${BULLET_HOME}/pubsub" /tmp/dev-storm-zookeeper
263+
"${BULLET_HOME}/ui" "${BULLET_HOME}/pubsub"
265264

266265
set -e
267266
}
@@ -275,10 +274,11 @@ teardown() {
275274
unset_all() {
276275
unset -f print_versions println download export_vars setup \
277276
install_bullet_examples \
278-
install_storm launch_storm launch_bullet_storm \
279-
launch_bullet_web_service \
277+
install_kafka install_bullet_kafka launch_kafka create_topics \
278+
install_spark launch_bullet_spark \
279+
install_web_service launch_web_service \
280280
install_node launch_bullet_ui \
281-
cleanup teardown unset_all launch
281+
cleanup teardown unset_all launch clean
282282
}
283283

284284
launch() {
@@ -288,42 +288,24 @@ launch() {
288288
teardown
289289

290290
setup
291-
292-
# install_bullet_examples
293-
# <------------- Remove this 88 - the above line needs to be uncommented and all the below stuff should be removed once this artifact actualy exists on the git cloud or whatever
294-
cp ~/bullet/bullet-db.github.io/examples/examples_artifacts.tar.gz ${BULLET_DOWNLOADS}/
295-
tar -xzf "${BULLET_DOWNLOADS}/examples_artifacts.tar.gz" -C "${BULLET_HOME}" # <------------ Remove this 88 - remove this line and the one above it once the artifact is actulaly on github
291+
install_bullet_examples
296292

297293
install_kafka
298294
install_bullet_kafka
299295
launch_kafka
300296
create_topics
301297

302-
install_web_service
303-
launch_web_service
304-
305298
install_spark
306-
# install_bullet_spark
307-
# <------------- Remove this 88 - the above line needs to be uncommented and all the below stuff should be removed once this artifact actualy exists on the git cloud or whatever
308-
cp $BULLET_HOME/bullet-examples/backend/spark/* $BULLET_SPARK # <------------ Remove this 88
309-
cp ~/bullet/bullet-spark/target/bullet-spark-0.1.1-SNAPSHOT-standalone.jar $BULLET_SPARK/bullet-spark.jar # <------------ Remove this 88
310-
311299
launch_bullet_spark
312300

313-
# Remove this 88 - deal with the following two lines:
314-
# Now do the UI stuff once the new UI is ready
315-
# ALSO - DON'T FORGET! The teardown stuff doesn't work unless you run the whole script (the "else" block at the bottom won't work) because the KAFKA_DIR isn't defined unless you run install_kafka function) - so fix that somehow
316-
317-
318-
319-
320-
301+
install_web_service
302+
launch_web_service
321303

322-
# install_node
323-
# launch_bullet_ui
304+
install_node
305+
launch_bullet_ui
324306

325-
# println "All components launched! Visit http://localhost:8800 (default) for the UI"
326-
# unset_all
307+
println "All components launched! Visit http://localhost:8800 (default) for the UI"
308+
unset_all
327309
}
328310

329311
clean() {

0 commit comments

Comments
 (0)