|
| 1 | +``` |
| 2 | +# Download logs from S3 bucket |
| 3 | +aws s3 sync s3://<log-bucket> /tmp/rubyforge-logs/ |
| 4 | +
|
| 5 | +# Combine logs into a single file |
| 6 | +cd /tmp |
| 7 | +cat rubyforge-logs/*.log > rubyforge.org.log |
| 8 | +
|
| 9 | +# Each line contains JSON representing the log entry |
| 10 | +$ tail -n1 rubyforge.org.log | json_pp |
| 11 | +{ |
| 12 | + "fastly_is_edge" : true, |
| 13 | + "fastly_server" : "cache-bfi-kbfi7400031-BFI", |
| 14 | + "geo_city" : "the dalles", |
| 15 | + "geo_country" : "united states", |
| 16 | + "host" : "rubyforge.org", |
| 17 | + "request_method" : "GET", |
| 18 | + "request_protocol" : "HTTP/1.1", |
| 19 | + "request_referer" : "", |
| 20 | + "request_user_agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", |
| 21 | + "response_body_size" : 826, |
| 22 | + "response_reason" : "Not Found", |
| 23 | + "response_state" : "MISS-CLUSTER", |
| 24 | + "response_status" : 404, |
| 25 | + "timestamp" : "2025-05-09T09:13:14+0000", |
| 26 | + "url" : "/pipermail/tzinfo-users/2012-November/000121.html" |
| 27 | +} |
| 28 | +
|
| 29 | +# Convert JSON to CSV |
| 30 | +cat rubyforge.org.log | \ |
| 31 | +ruby -rcsv -rjson -e "ARGF.each.with_index { |line, index| d = JSON.parse(line); puts d.keys.to_csv if index == 0; puts d.values.to_csv }" \ |
| 32 | +> rubyforge.org.log.csv |
| 33 | +
|
| 34 | +# Import into sqlite |
| 35 | +sqlite3 |
| 36 | +.import rubyforge.org.log.csv logs --csv |
| 37 | +
|
| 38 | +# Remove trailing +0000 from timestamp strings so that sqlite can treat them as dates and times |
| 39 | +sqlite> update logs set timestamp = replace(timestamp, '+0000', ''); |
| 40 | +
|
| 41 | +# Count the records |
| 42 | +sqlite> select count(*) from logs; |
| 43 | +185926 |
| 44 | +``` |
0 commit comments