|
1 | 1 | ######################################################################################################################## |
2 | | -######################################### Bullet Spark Settings ##################################### |
| 2 | +############################################### Bullet Spark defaults ################################################# |
3 | 3 | ######################################################################################################################## |
| 4 | +# This is the name of the concrete implementation of Data Producer to use. |
4 | 5 | bullet.spark.data.producer.class.name: "com.yahoo.bullet.spark.examples.RandomProducer" |
| 6 | + |
| 7 | +# This is the batch interval of your Spark Streaming job. Find out more at |
| 8 | +# https://spark.apache.org/docs/latest/streaming-programming-guide.html#setting-the-right-batch-interval. |
5 | 9 | bullet.spark.batch.duration.ms: 1000 |
| 10 | + |
| 11 | +# This is the size of the buffer for accumulating queries in the Query Receiver before emitting to Spark. |
6 | 12 | bullet.spark.receiver.query.block.size: 1 |
| 13 | + |
| 14 | +# This is the maximum number of partitions that will be created by the Query Receiver. |
7 | 15 | bullet.spark.receiver.query.coalesce.partitions: 10 |
| 16 | + |
| 17 | +# This is the number of Data Producers. |
8 | 18 | bullet.spark.data.producer.parallelism: 1 |
| 19 | + |
| 20 | +# This is the checkpoint directory. If you are running your Spark on a cluster, the directory must be an HDFS path. |
9 | 21 | bullet.spark.checkpoint.dir: "/tmp/spark-checkpoint" |
| 22 | + |
| 23 | +# If true, Bullet Spark recovers context from checkpoint files when restarting. |
| 24 | +# Otherwise Bullet Spark creates a new context. |
10 | 25 | bullet.spark.recover.from.checkpoint.enable: false |
| 26 | + |
| 27 | +# This is the Spark application name. |
11 | 28 | bullet.spark.app.name: "BulletSparkStreamingJob" |
| 29 | + |
| 30 | +# If true, Bullet Spark collects metrics which can be accessed via the Spark REST API (/metrics/json). |
12 | 31 | bullet.spark.metrics.enabled: false |
| 32 | + |
| 33 | +# If true, enables parallel processing of queries in each partition of the Filter Streaming job, This is particularly |
| 34 | +# useful when using Producers that are Direct (e.g. DirectKafkaProducer) and you would like to avoid repartitioning |
| 35 | +# the data and instead choose to parallelize within each partition (fixed by the producer) instead. |
| 36 | +# It speeds up the processing within those partitions by partitioning queries to multiple threads to do the filtering |
| 37 | +# operation concurrently. |
13 | 38 | bullet.spark.filter.partition.parallel.mode.enabled: false |
| 39 | + |
| 40 | +# This is the thread pool size to use when bullet.spark.filter.partition.parallel.mode.enabled is true. |
14 | 41 | bullet.spark.filter.partition.parallel.mode.parallelism: 4 |
| 42 | + |
| 43 | +# This is the minimum number of queries at which the parallel partition filtering is applied. Since there are fixed |
| 44 | +# costs to manage a thread pool, they are only created once the number of queries exceeds this threshold. |
| 45 | +# It is only used when bullet.spark.filter.partition.parallel.mode.enabled is true. |
15 | 46 | bullet.spark.filter.partition.parallel.mode.min.query.threshold: 10 |
16 | 47 |
|
| 48 | +# The following 2 settings are used to set the checkpoint intervals independently for each stateful transformation. |
| 49 | +# Checkpoint interval = Spark duration * checkpoint duration multiplier |
| 50 | +# Use this to control the frequency of checkpointing operation. If this is set too high, there might be too much |
| 51 | +# data to checkpoint (RDD lineage graph). |
| 52 | +bullet.spark.query.union.checkpoint.duration.multiplier: 10 |
| 53 | +bullet.spark.join.checkpoint.duration.multiplier: 10 |
| 54 | + |
| 55 | +# The feedback publisher switches your PubSub into QUERY_SUBMISSION mode to loop back metadata messages to query |
| 56 | +# receiver. If you need to change settings for your publisher in this mode that is different from the settings |
| 57 | +# used in the result publisher, override them here. This setting needs to be a Map if provided. |
| 58 | +# The example below pretends that your PubSub settings start with bullet.pubsub.custom. You will provide yours. |
| 59 | +# Example: |
| 60 | +# |
| 61 | +# bullet.spark.loop.pubsub.overrides: |
| 62 | +# bullet.pubsub.custom.publisher.setting: 1 |
| 63 | +# bullet.pubsub.custom.nested.publisher.setting: |
| 64 | +# foo: bar |
| 65 | +# bar: baz |
| 66 | +bullet.spark.loop.pubsub.overrides: {} |
| 67 | + |
17 | 68 | ######################################################################################################################## |
18 | | -######################################### Spark Streaming Settings ##################################### |
| 69 | +############################################### Spark Streaming defaults ############################################### |
19 | 70 | ######################################################################################################################## |
| 71 | +# The following settings are passed to Spark directly. You can add more settings here. |
| 72 | +# Find out more information about configuring a Spark job at https://spark.apache.org/docs/latest/configuration.html. |
| 73 | +# Add configuration that change infrequently here and submit more variable settings while submitting the job on the |
| 74 | +# command line. |
20 | 75 | spark.serializer: "org.apache.spark.serializer.KryoSerializer" |
21 | 76 | spark.closure.serializer: "org.apache.spark.serializer.KryoSerializer" |
22 | | -spark.kryo.registrator: "com.yahoo.bullet.spark.utils.BulletKryoRegistrator" |
23 | 77 | spark.streaming.stopGracefullyOnShutdown: "true" |
24 | 78 | spark.streaming.receiver.writeAheadLog.enable: "false" |
25 | 79 | spark.streaming.driver.writeAheadLog.allowBatching: "false" |
26 | 80 |
|
27 | 81 | ######################################################################################################################## |
28 | | -######################################### Query PubSub Settings ######################################## |
| 82 | +############################################### Query PubSub defaults ################################################## |
29 | 83 | ######################################################################################################################## |
| 84 | +# This is the type of PubSub context to use for result publisher. |
| 85 | +# The feedback publisher uses QUERY_SUBMISSION since it submits messages. |
30 | 86 | bullet.pubsub.context.name: "QUERY_PROCESSING" |
| 87 | +# This is the name of the concrete implementation of PubSub to use. |
| 88 | +# By default, it is the bulletin REST in-memory PubSub. |
31 | 89 | bullet.pubsub.class.name: "com.yahoo.bullet.kafka.KafkaPubSub" |
| 90 | +# Add settings specific to your PubSub. |
32 | 91 | bullet.pubsub.kafka.bootstrap.servers: "localhost:9092" |
33 | 92 | bullet.pubsub.kafka.request.topic.name: "bullet.requests" |
34 | 93 | bullet.pubsub.kafka.response.topic.name: "bullet.responses" |
| 94 | + |
| 95 | +######################################################################################################################## |
| 96 | +############################################### Bullet Core settings ################################################### |
| 97 | +######################################################################################################################## |
| 98 | +## You can also configure the core Bullet settings here. For documentation and defaults for those settings, refer to: |
| 99 | +## https://github.com/bullet-db/bullet-core/blob/master/src/main/resources/bullet_defaults.yaml |
| 100 | +######################################################################################################################## |
| 101 | +######################################################################################################################## |
| 102 | +# Factory class to get new BulletRecords. |
| 103 | +bullet.record.provider.class.name: "com.yahoo.bullet.record.SimpleBulletRecordProvider" |
0 commit comments