|
| 1 | +#!/usr/bin/env python |
| 2 | +# |
| 3 | +# Copyright 2021 Splunk, Inc. |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"): you may |
| 6 | +# not use this file except in compliance with the License. You may obtain |
| 7 | +# a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | +# License for the specific language governing permissions and limitations |
| 15 | +# under the License. |
| 16 | + |
| 17 | +from __future__ import absolute_import |
| 18 | + |
| 19 | +import os |
| 20 | +import re |
| 21 | +import sys |
| 22 | +import json |
| 23 | +# NOTE: splunklib must exist within github_commits/lib/splunklib for this |
| 24 | +# example to run! To run this locally use `SPLUNK_VERSION=latest docker compose up -d` |
| 25 | +# from the root of this repo which mounts this example and the latest splunklib |
| 26 | +# code together at /opt/splunk/etc/apps/github_commits |
| 27 | +from datetime import datetime |
| 28 | + |
| 29 | +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "lib")) |
| 30 | + |
| 31 | +from splunklib.modularinput import * |
| 32 | +from splunklib import six |
| 33 | +from six.moves import http_client |
| 34 | + |
| 35 | + |
| 36 | +class MyScript(Script): |
| 37 | + """All modular inputs should inherit from the abstract base class Script |
| 38 | + from splunklib.modularinput.script. |
| 39 | + They must override the get_scheme and stream_events functions, and, |
| 40 | + if the scheme returned by get_scheme has Scheme.use_external_validation |
| 41 | + set to True, the validate_input function. |
| 42 | + """ |
| 43 | + |
| 44 | + def get_scheme(self): |
| 45 | + """When Splunk starts, it looks for all the modular inputs defined by |
| 46 | + its configuration, and tries to run them with the argument --scheme. |
| 47 | + Splunkd expects the modular inputs to print a description of the |
| 48 | + input in XML on stdout. The modular input framework takes care of all |
| 49 | + the details of formatting XML and printing it. The user need only |
| 50 | + override get_scheme and return a new Scheme object. |
| 51 | +
|
| 52 | + :return: scheme, a Scheme object |
| 53 | + """ |
| 54 | + # Splunk will display "Github Commits" to users for this input |
| 55 | + scheme = Scheme("Github Commits") |
| 56 | + |
| 57 | + scheme.description = "Streams events of commits in the specified Github repository (must be public, unless setting a token)." |
| 58 | + # If you set external validation to True, without overriding validate_input, |
| 59 | + # the script will accept anything as valid. Generally you only need external |
| 60 | + # validation if there are relationships you must maintain among the |
| 61 | + # parameters, such as requiring min to be less than max in this example, |
| 62 | + # or you need to check that some resource is reachable or valid. |
| 63 | + # Otherwise, Splunk lets you specify a validation string for each argument |
| 64 | + # and will run validation internally using that string. |
| 65 | + scheme.use_external_validation = True |
| 66 | + scheme.use_single_instance = False # Set to false so an input can have an optional interval parameter. |
| 67 | + |
| 68 | + owner_argument = Argument("owner") |
| 69 | + owner_argument.title = "Owner" |
| 70 | + owner_argument.data_type = Argument.data_type_string |
| 71 | + owner_argument.description = "Github user or organization that created the repository." |
| 72 | + owner_argument.required_on_create = True |
| 73 | + # If you are not using external validation, you would add something like: |
| 74 | + # |
| 75 | + # scheme.validation = "owner==splunk" |
| 76 | + scheme.add_argument(owner_argument) |
| 77 | + |
| 78 | + repo_name_argument = Argument("repo_name") |
| 79 | + repo_name_argument.title = "Repo Name" |
| 80 | + repo_name_argument.data_type = Argument.data_type_string |
| 81 | + repo_name_argument.description = "Name of the Github repository." |
| 82 | + repo_name_argument.required_on_create = True |
| 83 | + scheme.add_argument(repo_name_argument) |
| 84 | + |
| 85 | + token_argument = Argument("token") |
| 86 | + token_argument.title = "Token" |
| 87 | + token_argument.data_type = Argument.data_type_string |
| 88 | + token_argument.description = "(Optional) A Github API access token. Required for private repositories (the token must have the 'repo' and 'public_repo' scopes enabled). Recommended to avoid Github's API limit, especially if setting an interval." |
| 89 | + token_argument.required_on_create = False |
| 90 | + token_argument.required_on_edit = False |
| 91 | + scheme.add_argument(token_argument) |
| 92 | + |
| 93 | + return scheme |
| 94 | + |
| 95 | + def validate_input(self, validation_definition): |
| 96 | + """In this example we are using external validation to verify that the Github |
| 97 | + repository exists. If validate_input does not raise an Exception, the input |
| 98 | + is assumed to be valid. Otherwise it prints the exception as an error message |
| 99 | + when telling splunkd that the configuration is invalid. |
| 100 | +
|
| 101 | + When using external validation, after splunkd calls the modular input with |
| 102 | + --scheme to get a scheme, it calls it again with --validate-arguments for |
| 103 | + each instance of the modular input in its configuration files, feeding XML |
| 104 | + on stdin to the modular input to do validation. It is called the same way |
| 105 | + whenever a modular input's configuration is edited. |
| 106 | +
|
| 107 | + :param validation_definition: a ValidationDefinition object |
| 108 | + """ |
| 109 | + # Get the values of the parameters, and construct a URL for the Github API |
| 110 | + |
| 111 | + owner = validation_definition.parameters["owner"] |
| 112 | + repo_name = validation_definition.parameters["repo_name"] |
| 113 | + token = None |
| 114 | + if "token" in validation_definition.parameters: |
| 115 | + token = validation_definition.parameters["token"] |
| 116 | + |
| 117 | + # Call Github to retrieve repo information |
| 118 | + res = _get_github_commits(owner, repo_name, 1, 1, token) |
| 119 | + |
| 120 | + # If we get any kind of message, that's a bad sign. |
| 121 | + if "message" in res: |
| 122 | + raise ValueError("Some error occur during fetching commits. - " + res["message"]) |
| 123 | + elif len(res) == 1 and "sha" in res[0]: |
| 124 | + pass |
| 125 | + else: |
| 126 | + raise ValueError("Expected only the latest commit, instead found " + str(len(res)) + " commits.") |
| 127 | + |
| 128 | + def stream_events(self, inputs, ew): |
| 129 | + """This function handles all the action: splunk calls this modular input |
| 130 | + without arguments, streams XML describing the inputs to stdin, and waits |
| 131 | + for XML on stdout describing events. |
| 132 | +
|
| 133 | + If you set use_single_instance to True on the scheme in get_scheme, it |
| 134 | + will pass all the instances of this input to a single instance of this |
| 135 | + script. |
| 136 | +
|
| 137 | + :param inputs: an InputDefinition object |
| 138 | + :param ew: an EventWriter object |
| 139 | + """ |
| 140 | + |
| 141 | + # Go through each input for this modular input |
| 142 | + for input_name, input_item in six.iteritems(inputs.inputs): |
| 143 | + # Get fields from the InputDefinition object |
| 144 | + owner = input_item["owner"] |
| 145 | + repo_name = input_item["repo_name"] |
| 146 | + token = None |
| 147 | + if "token" in input_item: |
| 148 | + token = input_item["token"] |
| 149 | + # Get the checkpoint directory out of the modular input's metadata |
| 150 | + checkpoint_dir = inputs.metadata["checkpoint_dir"] |
| 151 | + |
| 152 | + checkpoint_file_path = os.path.join(checkpoint_dir, owner + "_" + repo_name + ".txt") |
| 153 | + checkpoint_file_new_contents = "" |
| 154 | + error_found = False |
| 155 | + |
| 156 | + # Set the temporary contents of the checkpoint file to an empty string |
| 157 | + checkpoint_file_contents = "" |
| 158 | + |
| 159 | + try: |
| 160 | + # read sha values from file, if exist |
| 161 | + file = open(checkpoint_file_path, 'r') |
| 162 | + checkpoint_file_contents = file.read() |
| 163 | + file.close() |
| 164 | + except: |
| 165 | + # If there's an exception, assume the file doesn't exist |
| 166 | + # Create the checkpoint file with an empty string |
| 167 | + file = open(checkpoint_file_path, "a") |
| 168 | + file.write("") |
| 169 | + file.close() |
| 170 | + |
| 171 | + per_page = 100 # The maximum per page value supported by the Github API. |
| 172 | + page = 1 |
| 173 | + |
| 174 | + while True: |
| 175 | + # Get the commit count from the Github API |
| 176 | + res = _get_github_commits(owner, repo_name, per_page, page, token) |
| 177 | + if len(res) == 0: |
| 178 | + break |
| 179 | + |
| 180 | + file = open(checkpoint_file_path, "a") |
| 181 | + |
| 182 | + for record in res: |
| 183 | + if error_found: |
| 184 | + break |
| 185 | + |
| 186 | + # If the file exists and doesn't contain the sha, or if the file doesn't exist. |
| 187 | + if checkpoint_file_contents.find(record["sha"] + "\n") < 0: |
| 188 | + try: |
| 189 | + _stream_commit(ew, owner, repo_name, record) |
| 190 | + # Append this commit to the string we'll write at the end |
| 191 | + checkpoint_file_new_contents += record["sha"] + "\n" |
| 192 | + except: |
| 193 | + error_found = True |
| 194 | + file.write(checkpoint_file_new_contents) |
| 195 | + |
| 196 | + # We had an error, die. |
| 197 | + return |
| 198 | + |
| 199 | + file.write(checkpoint_file_new_contents) |
| 200 | + file.close() |
| 201 | + |
| 202 | + page += 1 |
| 203 | + |
| 204 | + |
| 205 | +def _get_display_date(date): |
| 206 | + month_strings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] |
| 207 | + date_format = "%Y-%m-%d %H:%M:%S" |
| 208 | + date = datetime.strptime(date, date_format) |
| 209 | + |
| 210 | + hours = date.hour |
| 211 | + if hours < 10: |
| 212 | + hours = "0" + str(hours) |
| 213 | + |
| 214 | + mins = date.minute |
| 215 | + if mins < 10: |
| 216 | + mins = "0" + str(mins) |
| 217 | + |
| 218 | + return "{month} {day}, {year} - {hour}:{minute} {period}".format(month=month_strings[date.month - 1], day=date.day, |
| 219 | + year=date.year, hour=hours, minute=mins, |
| 220 | + period="AM" if date.hour < 12 else "PM") |
| 221 | + |
| 222 | + |
| 223 | +def _get_github_commits(owner, repo_name, per_page=1, page=1, token=None): |
| 224 | + # Read the response from the Github API, then parse the JSON data into an object |
| 225 | + repo_path = "/repos/%s/%s/commits?per_page=%d&page=%d" % (owner, repo_name, per_page, page) |
| 226 | + connection = http_client.HTTPSConnection('api.github.com') |
| 227 | + headers = { |
| 228 | + 'Content-type': 'application/json', |
| 229 | + 'User-Agent': 'splunk-sdk-python' |
| 230 | + } |
| 231 | + if token: |
| 232 | + headers['Authorization'] = 'token ' + token |
| 233 | + connection.request('GET', repo_path, headers=headers) |
| 234 | + response = connection.getresponse() |
| 235 | + body = response.read().decode() |
| 236 | + return json.loads(body) |
| 237 | + |
| 238 | + |
| 239 | +def _stream_commit(ew, owner, repo_name, commitData): |
| 240 | + json_data = { |
| 241 | + "sha": commitData["sha"], |
| 242 | + "api_url": commitData["url"], |
| 243 | + "url": "https://github.com/" + owner + "/" + repo_name + "/commit/" + commitData["sha"] |
| 244 | + } |
| 245 | + commit = commitData["commit"] |
| 246 | + |
| 247 | + # At this point, assumed checkpoint doesn't exist. |
| 248 | + json_data["message"] = re.sub("\n|\r", " ", commit["message"]) |
| 249 | + json_data["author"] = commit["author"]["name"] |
| 250 | + json_data["rawdate"] = commit["author"]["date"] |
| 251 | + commit_date = re.sub("T|Z", " ", commit["author"]["date"]).strip() |
| 252 | + json_data["displaydate"] = _get_display_date(commit_date) |
| 253 | + |
| 254 | + # Create an Event object, and set its fields |
| 255 | + event = Event() |
| 256 | + event.stanza = repo_name |
| 257 | + event.sourceType = "github_commits" |
| 258 | + event.data = json.dumps(json_data) |
| 259 | + |
| 260 | + # Tell the EventWriter to write this event |
| 261 | + ew.write_event(event) |
| 262 | + |
| 263 | + |
| 264 | +if __name__ == "__main__": |
| 265 | + sys.exit(MyScript().run(sys.argv)) |
0 commit comments