diff --git a/.flake8 b/.flake8 index 939bf85..2bcd70e 100644 --- a/.flake8 +++ b/.flake8 @@ -1,2 +1,2 @@ [flake8] -max-line-length = 300 \ No newline at end of file +max-line-length = 88 diff --git a/crim.py b/crim.py index 60b11f8..3f6aeba 100644 --- a/crim.py +++ b/crim.py @@ -1,33 +1,112 @@ import re -re_date = "(?i)(?:[0-3]?\d(?:st|nd|rd|th)?\s+(?:of\s+)?(?:jan\.?|january|feb\.?|february|mar\.?|march|apr\.?|april|may|jun\.?|june|jul\.?|july|aug\.?|august|sep\.?|september|oct\.?|october|nov\.?|november|dec\.?|december)|(?:jan\.?|january|feb\.?|february|mar\.?|march|apr\.?|april|may|jun\.?|june|jul\.?|july|aug\.?|august|sep\.?|september|oct\.?|october|nov\.?|november|dec\.?|december)\s+[0-3]?\d(?:st|nd|rd|th)?)(?:\,)?\s*(?:\d{4})?|[0-3]?\d[-\./][0-3]?\d[-\./]\d{2,4}" -re_time = '(?i)\d{1,2}:\d{2} ?(?:[ap]\.?m\.?)?|\d[ap]\.?m\.?' -re_phone = '''((?:(?]+[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019])?)' -re_email = r"(?i)([A-Za-z0-9!#$%&'*+\/=?^_{|.}~-]+@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)" -re_ipv4 = '(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)' -re_ipv6 = '\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?]+[^\s`!()\[\]{};:'\".,<>?\xab\xbb\u201c\u201d\u2018\u2019])?)" +) +re_email = ( + r"(?i)([A-Za-z0-9!#$%&'*+\/=?^_{|.}~-]+@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+" + r"[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)" +) +re_ipv4 = ( + r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]" + r"[0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|" + r"[01]?[0-9][0-9]?)" +) +re_ipv6 = ( + r"\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(? list: """Function to match using regex findall function Args: @@ -73,6 +152,7 @@ def match(text: str, regex: str) -> list: parsed = list(re.findall(regex, text)) return parsed + def match_by_regex_search(text: str, regex: str) -> list: """Function to match using regex search function Args: @@ -81,7 +161,7 @@ def match_by_regex_search(text: str, regex: str) -> list: Returns: list (list): list of sensitive data found in lines """ - parsed=[] + parsed = [] for line in text.split(): if re.search(regex, line): pattern_string = re.search(regex, line) @@ -89,86 +169,114 @@ def match_by_regex_search(text: str, regex: str) -> list: parsed.append(sensitive_string) return parsed + def dates(text: str) -> list: return match(text, regex_map["dates"]) + def times(text: str) -> list: return match(text, regex_map["times"]) + def phones(text: str) -> list: return match(text, regex_map["phones"]) + def phones_with_exts(text: str) -> list: return match(text, regex_map["phones_with_exts"]) -def emails(text:str) -> list: + +def emails(text: str) -> list: return match(text, regex_map["emails"]) + def links(text: str) -> list: return match_by_regex_search(text, regex_map["links"]) + def ipv4s(text: str) -> list: return match(text, regex_map["ipv4"]) + def ipv6s(text: str) -> list: return match(text, regex_map["ipv6"]) + def ips(text: str) -> list: return match(text, regex_map["ips"]) + def not_known_ports(text: str) -> list: return match(text, regex_map["not_known_ports"]) + def prices(text: str) -> list: return match(text, regex_map["prices"]) + def hex_colors(text: str) -> list: return match(text, regex_map["hex_colors"]) + def credit_cards(text: str) -> list: return match(text, regex_map["credit_cards"]) + def visa_cards(text: str) -> list: return match(text, regex_map["visa_cards"]) + def master_cards(text: str) -> list: return match(text, regex_map["master_cards"]) + def btc_address(text: str) -> list: return match(text, regex_map["btc_addresses"]) + def street_addresses(text: str) -> list: return match(text, regex_map["street_addresses"]) + def zip_codes(text: str) -> list: return match(text, regex_map["zip_codes"]) + def po_boxes(text: str) -> list: return match(text, regex_map["po_boxes"]) + def ssn_numbers(text: str) -> list: return match(text, regex_map["ssn_number"]) + def md5_hashes(text: str) -> list: return match(text, regex_map["md5_hashes"]) + def sha1_hashes(text: str) -> list: return match(text, regex_map["sha1_hashes"]) + def sha256_hashes(text: str) -> list: return match(text, regex_map["sha256_hashes"]) + def isbn13s(text: str) -> list: return match(text, regex_map["isbn13"]) + def isbn10s(text: str) -> list: return match(text, regex_map["isbn10"]) + def mac_addresses(text: str) -> list: return match_by_regex_search(text, regex_map["mac_addresses"]) + def iban_numbers(text: str) -> list: return match(text, regex_map["iban_numbers"]) + def git_repos(text: str) -> list: - return match_by_regex_search(text, regex_map["git_repos"]) \ No newline at end of file + return match_by_regex_search(text, regex_map["git_repos"]) diff --git a/test_crim.py b/test_crim.py index c13ea83..b0fc7c9 100644 --- a/test_crim.py +++ b/test_crim.py @@ -1,59 +1,81 @@ import crim as cregex + def test_cregex_dates(): - test_data = ["3-23-17", - "3.23.17", - "03.23.17", - "March 23th, 2017", - "Mar 23th 2017", - "Mar. 23th, 2017", - "23 Mar 2017",] + test_data = [ + "3-23-17", + "3.23.17", + "03.23.17", + "March 23th, 2017", + "Mar 23th 2017", + "Mar. 23th, 2017", + "23 Mar 2017", + ] for test_string in test_data: - assert cregex.dates(test_string) == [test_string], "Dates regex failed on: " + test_string + assert cregex.dates(test_string) == [test_string], ( + "Dates regex failed on: " + test_string + ) + def test_cregex_times(): - test_data = ["09:45", - "9:45", - "23:45", - "9:00am", - "9am", - "9:00 A.M.", - "9:00 pm",] + test_data = [ + "09:45", + "9:45", + "23:45", + "9:00am", + "9am", + "9:00 A.M.", + "9:00 pm", + ] for test_string in test_data: - assert cregex.times(test_string) == [test_string], "Times regex failed on: " + test_string + assert cregex.times(test_string) == [test_string], ( + "Times regex failed on: " + test_string + ) + def test_cregex_phones(): - test_data = ["12345678900", - "1234567890", - "+1 234 567 8900", - "234-567-8900", - "1-234-567-8900", - "1.234.567.8900", - "5678900", - "567-8900", - "(003) 555-1212", - "+41 22 730 5989", - "+442345678900"] + test_data = [ + "12345678900", + "1234567890", + "+1 234 567 8900", + "234-567-8900", + "1-234-567-8900", + "1.234.567.8900", + "5678900", + "567-8900", + "(003) 555-1212", + "+41 22 730 5989", + "+442345678900", + ] for test_string in test_data: - assert cregex.phones(test_string) == [test_string], "Phones regex failed on: " + test_string + assert cregex.phones(test_string) == [test_string], ( + "Phones regex failed on: " + test_string + ) + def test_cregex_phones_with_exts(): - test_data = ["(523)222-8888 ext 527", - "(523)222-8888x623", - "(523)222-8888 x623", - "(523)222-8888 x 623", - "(523)222-8888EXT623", - "523-222-8888EXT623", - "(523) 222-8888 x 623",] + test_data = [ + "(523)222-8888 ext 527", + "(523)222-8888x623", + "(523)222-8888 x623", + "(523)222-8888 x 623", + "(523)222-8888EXT623", + "523-222-8888EXT623", + "(523) 222-8888 x 623", + ] for test_string in test_data: - assert cregex.phones_with_exts(test_string) == [test_string], "Phones with exts regex failed on: " + test_string + assert cregex.phones_with_exts(test_string) == [test_string], ( + "Phones with exts regex failed on: " + test_string + ) + def test_cregex_links(): - test_data = ["http://www.google.com", + test_data = [ + "http://www.google.com", "https://www.google.com", "www.google.com", "http://www.google.com/search?q=python", @@ -73,103 +95,128 @@ def test_cregex_links(): "www.google.com/search?q=python&hl=en&tbm=nws&tbs=qdr:d&tbs=qdr:w", "http://www.google.com/search?q=python&hl=en&t", "www.google.com", - "http://www.google.com", - "www.google.com/?query=dog", - "sub.example.com", - "http://www.google.com/%&#/?q=dog", - "google.com",] - + "http://www.google.com", + "www.google.com/?query=dog", + "sub.example.com", + "http://www.google.com/%&#/?q=dog", + "google.com", + ] + for test_string in test_data: - assert cregex.links(test_string) == [test_string], "Links regex failed on: " + test_string + assert cregex.links(test_string) == [test_string], ( + "Links regex failed on: " + test_string + ) + def test_cregex_emails(): - test_data = ["john.smith@gmail.com", - "john_smith@gmail.com", - "john@example.net", - "John@example.net", - "jane@example.gov.us"] + test_data = [ + "john.smith@gmail.com", + "john_smith@gmail.com", + "john@example.net", + "John@example.net", + "jane@example.gov.us", + ] failing_tests = ["john.smith@gmail..com"] for test_string in test_data: - assert cregex.emails(test_string) == [test_string], "Emails regex failed on: " + test_string + assert cregex.emails(test_string) == [test_string], ( + "Emails regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.emails(test_string) != [test_string], "These should not be matched " + test_string + assert cregex.emails(test_string) != [test_string], ( + "These should not be matched " + test_string + ) + def test_cregex_ipv4s(): - test_data = ["127.0.0.1", - "192.168.1.1", - "8.8.8.8", - "192.30.253.113", - "216.58.194.46"] - + test_data = [ + "127.0.0.1", + "192.168.1.1", + "8.8.8.8", + "192.30.253.113", + "216.58.194.46", + ] + for test_string in test_data: - assert cregex.ipv4s(test_string) == [test_string], "IPv4s regex failed on: " + test_string + assert cregex.ipv4s(test_string) == [test_string], ( + "IPv4s regex failed on: " + test_string + ) + def test_cregex_ipv6s(): - test_data = ["fe80:0000:0000:0000:0204:61ff:fe9d:f156", - "fe80:0:0:0:204:61ff:fe9d:f156", - "fe80::204:61ff:fe9d:f156", - "fe80:0000:0000:0000:0204:61ff:254.157.241.86", - "fe80:0:0:0:0204:61ff:254.157.241.86", - "::1"] + test_data = [ + "fe80:0000:0000:0000:0204:61ff:fe9d:f156", + "fe80:0:0:0:204:61ff:fe9d:f156", + "fe80::204:61ff:fe9d:f156", + "fe80:0000:0000:0000:0204:61ff:254.157.241.86", + "fe80:0:0:0:0204:61ff:254.157.241.86", + "::1", + ] for test_string in test_data: - assert cregex.ipv6s(test_string) == [test_string], "IPv6s regex failed on: " + test_string + assert cregex.ipv6s(test_string) == [test_string], ( + "IPv6s regex failed on: " + test_string + ) + def test_cregex_ips(): - test_data = ["127.0.0.1", - "192.168.1.1", - "8.8.8.8", - "192.30.253.113", - "216.58.194.46", - "fe80:0000:0000:0000:0204:61ff:fe9d:f156", - "fe80:0:0:0:204:61ff:fe9d:f156", - "fe80::204:61ff:fe9d:f156", - "fe80:0000:0000:0000:0204:61ff:254.157.241.86", - "fe80:0:0:0:0204:61ff:254.157.241.86", - "::1"] + test_data = [ + "127.0.0.1", + "192.168.1.1", + "8.8.8.8", + "192.30.253.113", + "216.58.194.46", + "fe80:0000:0000:0000:0204:61ff:fe9d:f156", + "fe80:0:0:0:204:61ff:fe9d:f156", + "fe80::204:61ff:fe9d:f156", + "fe80:0000:0000:0000:0204:61ff:254.157.241.86", + "fe80:0:0:0:0204:61ff:254.157.241.86", + "::1", + ] for test_string in test_data: - assert cregex.ips(test_string) == [test_string], "IPs regex failed on: " + test_string + assert cregex.ips(test_string) == [test_string], ( + "IPs regex failed on: " + test_string + ) + def test_cregex_not_ports(): - test_data = ["1024", - "2121", - "8080", - "12345", - "55555", - "65535"] - - failing_tests = ["21", - "80", - "1023", - "65536"] + test_data = ["1024", "2121", "8080", "12345", "55555", "65535"] + + failing_tests = ["21", "80", "1023", "65536"] for test_string in test_data: - assert cregex.not_known_ports(test_string) == [test_string], "Not ports regex failed on: " + test_string + assert cregex.not_known_ports(test_string) == [test_string], ( + "Not ports regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.not_known_ports(test_string) != [test_string], "This is a well known port " + test_string + assert cregex.not_known_ports(test_string) != [test_string], ( + "This is a well known port " + test_string + ) + def test_cregex_prices(): - test_data = ["$1.23", - "$1", - "$1,000", - "$10,000.00"] + test_data = ["$1.23", "$1", "$1,000", "$10,000.00"] - failing_tests = ["$1,10,0", - "$100.000"] + failing_tests = ["$1,10,0", "$100.000"] for test_string in test_data: - assert cregex.prices(test_string) == [test_string], "Prices regex failed on: " + test_string + assert cregex.prices(test_string) == [test_string], ( + "Prices regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.prices(test_string) != [test_string], "This is not a price " + test_string + assert cregex.prices(test_string) != [test_string], ( + "This is not a price " + test_string + ) + def test_cregex_hex_colors(): - test_data = ["#000000", + test_data = [ + "#000000", "#FFFFFF", "#FF0000", "#00FF00", @@ -178,244 +225,328 @@ def test_cregex_hex_colors(): "#FF00FF", "#00FFFF", "#000000FF", - "#FFFFFFFF"] + "#FFFFFFFF", + ] - failing_tests = ["#000000FFF", - "#FFFFFFFFF"] + failing_tests = ["#000000FFF", "#FFFFFFFFF"] for test_string in test_data: - assert cregex.hex_colors(test_string) == [test_string], "Hex colors regex failed on: " + test_string + assert cregex.hex_colors(test_string) == [test_string], ( + "Hex colors regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.hex_colors(test_string) != [test_string], "This is not a hex color " + test_string + assert cregex.hex_colors(test_string) != [test_string], ( + "This is not a hex color " + test_string + ) + def test_cregex_credit_cards(): - test_data = ["0000-0000-0000-0000", - "0123456789012345", - "0000 0000 0000 0000", - "012345678901234"] + test_data = [ + "0000-0000-0000-0000", + "0123456789012345", + "0000 0000 0000 0000", + "012345678901234", + ] for test_string in test_data: - assert cregex.credit_cards(test_string) == [test_string], "Credit cards regex failed on: " + test_string + assert cregex.credit_cards(test_string) == [test_string], ( + "Credit cards regex failed on: " + test_string + ) + def test_cregex_visa_cards(): - test_data=["4111 1111 1111 1111", - "4222 2222 2222 2222"] + test_data = ["4111 1111 1111 1111", "4222 2222 2222 2222"] - failing_tests = ["5500 0000 0000 0004", - "3400 0000 0000 009", - "3000 0000 0000 04"] + failing_tests = ["5500 0000 0000 0004", "3400 0000 0000 009", "3000 0000 0000 04"] for test_string in test_data: - assert cregex.visa_cards(test_string) == [test_string], "Visa cards regex failed on: " + test_string + assert cregex.visa_cards(test_string) == [test_string], ( + "Visa cards regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.visa_cards(test_string) != [test_string], "This is not a visa card " + test_string + assert cregex.visa_cards(test_string) != [test_string], ( + "This is not a visa card " + test_string + ) def test_cregex_master_cards(): - test_data=["5500 0000 0000 0004", - "5500 3334 0000 1234"] + test_data = ["5500 0000 0000 0004", "5500 3334 0000 1234"] - failing_tests = ["4111 1111 1111 1111", - "4222 2222 2222 2222", - "3400 0000 0000 009", - "3000 0000 0000 04"] + failing_tests = [ + "4111 1111 1111 1111", + "4222 2222 2222 2222", + "3400 0000 0000 009", + "3000 0000 0000 04", + ] for test_string in test_data: - assert cregex.master_cards(test_string) == [test_string], "Master cards regex failed on: " + test_string + assert cregex.master_cards(test_string) == [test_string], ( + "Master cards regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.master_cards(test_string) != [test_string], "This is not a master card " + test_string + assert cregex.master_cards(test_string) != [test_string], ( + "This is not a master card " + test_string + ) -def test_cregex_btc_address(): - test_data = ["1LgqButDNV2rVHe9DATt6WqD8tKZEKvaK2", - "19P6EYhu6kZzRy9Au4wRRZVE8RemrxPbZP", - "1bones8KbQge9euDn523z5wVhwkTP3uc1", - "1Bow5EMqtDGV5n5xZVgdpRPJiiDK6XSjiC"] - failing_tests = ["2LgqButDNV2rVHe9DATt6WqD8tKZEKvaK2", - "19Ry9Au4wRRZVE8RemrxPbZP", - "1bones8KbQge9euDn523z5wVhwkTP3uc12939", - "1Bow5EMqtDGV5n5xZVgdpR"] +def test_cregex_btc_address(): + test_data = [ + "1LgqButDNV2rVHe9DATt6WqD8tKZEKvaK2", + "19P6EYhu6kZzRy9Au4wRRZVE8RemrxPbZP", + "1bones8KbQge9euDn523z5wVhwkTP3uc1", + "1Bow5EMqtDGV5n5xZVgdpRPJiiDK6XSjiC", + ] + + failing_tests = [ + "2LgqButDNV2rVHe9DATt6WqD8tKZEKvaK2", + "19Ry9Au4wRRZVE8RemrxPbZP", + "1bones8KbQge9euDn523z5wVhwkTP3uc12939", + "1Bow5EMqtDGV5n5xZVgdpR", + ] for test_string in test_data: - assert cregex.btc_address(test_string) == [test_string], "BTC address regex failed on: " + test_string + assert cregex.btc_address(test_string) == [test_string], ( + "BTC address regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.btc_address(test_string) != [test_string], "This is not a BTC address " + test_string + assert cregex.btc_address(test_string) != [test_string], ( + "This is not a BTC address " + test_string + ) + def test_cregex_street_addresses(): - test_data = ["101 main st.", - "504 parkwood drive", - "3 elm boulevard", - "500 elm street "] + test_data = [ + "101 main st.", + "504 parkwood drive", + "3 elm boulevard", + "500 elm street ", + ] failing_tests = ["101 main straight"] for test_string in test_data: - assert cregex.street_addresses(test_string) == [test_string], "Street addresses regex failed on: " + test_string + assert cregex.street_addresses(test_string) == [test_string], ( + "Street addresses regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.street_addresses(test_string) != [test_string], "This is not a street address " + test_string + assert cregex.street_addresses(test_string) != [test_string], ( + "This is not a street address " + test_string + ) + def test_cregex_zip_codes(): - test_data = ["02540", - "02540-4119"] + test_data = ["02540", "02540-4119"] - failing_tests = ["10001-1234-5678-9012-3456-7890-1234", + failing_tests = [ + "10001-1234-5678-9012-3456-7890-1234", "101 main straight", - "123456"] + "123456", + ] for test_string in test_data: - assert cregex.zip_codes(test_string) == [test_string], "Zip codes regex failed on: " + test_string + assert cregex.zip_codes(test_string) == [test_string], ( + "Zip codes regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.zip_codes(test_string) != [test_string], "This is not a zip code " + test_string + assert cregex.zip_codes(test_string) != [test_string], ( + "This is not a zip code " + test_string + ) + def test_cregex_po_boxes(): - test_data = ["PO Box 123456", - "p.o. box 234234"] + test_data = ["PO Box 123456", "p.o. box 234234"] failing_tests = ["PO Box 1234-5678-9012-3456-7890-1234"] for test_string in test_data: - assert cregex.po_boxes(test_string) == [test_string], "PO boxes regex failed on: " + test_string + assert cregex.po_boxes(test_string) == [test_string], ( + "PO boxes regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.po_boxes(test_string) != [test_string], "This is not a PO box " + test_string + assert cregex.po_boxes(test_string) != [test_string], ( + "This is not a PO box " + test_string + ) + def test_cregex_ssns(): - test_data = ["000-00-0000", - "111-11-1111", - "222-22-2222", - "123-45-6789"] + test_data = ["000-00-0000", "111-11-1111", "222-22-2222", "123-45-6789"] - failing_tests = ["123-45-6789-1234", + failing_tests = [ + "123-45-6789-1234", "1234567891234", "123-45-6789-1234", - "1234567891234"] + "1234567891234", + ] for test_string in test_data: - assert cregex.ssn_numbers(test_string) == [test_string], "SSNs regex failed on: " + test_string + assert cregex.ssn_numbers(test_string) == [test_string], ( + "SSNs regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.ssn_numbers(test_string) != [test_string], "This is not an SSN " + test_string + assert cregex.ssn_numbers(test_string) != [test_string], ( + "This is not an SSN " + test_string + ) -def test_cregex_md5_hashes(): - test_data = ["b5ab01fad5a008d436f76aafc896f9c6", - "00000000000000000000000000000000", - "fffFFFfFFfFFFfFFFFfFfFfffffFfFFF"] - failing_tests = ["0cc175b9c0f1b6a831c399e2697723-1234", +def test_cregex_md5_hashes(): + test_data = [ + "b5ab01fad5a008d436f76aafc896f9c6", + "00000000000000000000000000000000", + "fffFFFfFFfFFFfFFFFfFfFfffffFfFFF", + ] + + failing_tests = [ + "0cc175b9c0f1b6a831c399e2697723-1234", "d41d8cd98f00b204e9800998ecf8427e-1234", "900150983cd24fb0d6963f7d28e17f72-1234", "f96b697d7cb9dd08c81209bcf0aaf94f-1234", "b5ab01fad5a008d436f76aafc896f9c600000000", - "", - "7TS5x1trQs652k4AZ3hJE83YCvJRy0U8", - "b5ab01fad5a008-436f76aafc896f9c6"] + "", + "7TS5x1trQs652k4AZ3hJE83YCvJRy0U8", + "b5ab01fad5a008-436f76aafc896f9c6", + ] for test_string in test_data: - assert cregex.md5_hashes(test_string) == [test_string], "MD5 hashes regex failed on: " + test_string + assert cregex.md5_hashes(test_string) == [test_string], ( + "MD5 hashes regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.md5_hashes(test_string) != [test_string], "This is not an MD5 hash " + test_string + assert cregex.md5_hashes(test_string) != [test_string], ( + "This is not an MD5 hash " + test_string + ) + def test_cregex_sha1_hashes(): - test_data = ["da39a3ee5e6b4b0d3255bfef95601890afd80709", + test_data = [ + "da39a3ee5e6b4b0d3255bfef95601890afd80709", "0000000000000000000000000000000000000000", "ffffffffffffffffffffffffffffffffffffffff", "b5ab01fad5a008d436f76aafc896f9c6abcd1234", - "0000000000000000000000000000000000000000", - "fffFFFfFFfFFFfFFFFfFfFfffffFfFFFffffFFFF"] + "0000000000000000000000000000000000000000", + "fffFFFfFFfFFFfFFFFfFfFfffffFfFFFffffFFFF", + ] - failing_tests = ["0cc175b9c0f1b6a831c399e2697723-1234", + failing_tests = [ + "0cc175b9c0f1b6a831c399e2697723-1234", "d41d8cd98f00b204e9800998ecf8427e-1234", "900150983cd24fb0d6963f7d28e17f72-1234", "f96b697d7cb9dd08c81209bcf0aaf94f-1234", "b5ab01fad5a008d436f76aafc896f9c600000000202020202020202020202020", - "", - "7TS5x1trQs652k4AZ3hJE83YCvJRy0U85x1trQs652k4AZ3hJE83YCvJRy0U8asd", - "b5ab01fad5a008-436f76aafc896f9c6-436f76aafc896f9c6-436f76aafc896" - ] + "", + "7TS5x1trQs652k4AZ3hJE83YCvJRy0U85x1trQs652k4AZ3hJE83YCvJRy0U8asd", + "b5ab01fad5a008-436f76aafc896f9c6-436f76aafc896f9c6-436f76aafc896", + ] for test_string in test_data: - assert cregex.sha1_hashes(test_string) == [test_string], "SHA1 hashes regex failed on: " + test_string + assert cregex.sha1_hashes(test_string) == [test_string], ( + "SHA1 hashes regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.sha1_hashes(test_string) != [test_string], "This is not an SHA1 hash " + test_string + assert cregex.sha1_hashes(test_string) != [test_string], ( + "This is not an SHA1 hash " + test_string + ) -def test_cregex_sha256_hashes(): - test_data = ["3f4146a1d0b5dac26562ff7dc6248573f4e996cf764a0f517318ff398dcfa792", - "0000000000000000000000000000000000000000000000000000000000000000", - "fffFFFfFFfFFFfFFFFfFfFfffffFfFFFffffFFFFfffffFFFFFffFFffFFffFFff"] - failing_tests = ["3f4146a1d0b5dac26562ff7dc6248573f4e996cf764a0f517318ff398dcfa7920", - "", - "e9iLS075z9HAJlUWg2ZpK5hRxjLeSpIqMKJO67c739VYf7Bj7eR1WjOO82IHcXVd", - "b5ab01fad5a008-436f76aafc896f9c6-436f76aafc896f9c6-436f76aafc896" - ] +def test_cregex_sha256_hashes(): + test_data = [ + "3f4146a1d0b5dac26562ff7dc6248573f4e996cf764a0f517318ff398dcfa792", + "0000000000000000000000000000000000000000000000000000000000000000", + "fffFFFfFFfFFFfFFFFfFfFfffffFfFFFffffFFFFfffffFFFFFffFFffFFffFFff", + ] + + failing_tests = [ + "3f4146a1d0b5dac26562ff7dc6248573f4e996cf764a0f517318ff398dcfa7920", + "", + "e9iLS075z9HAJlUWg2ZpK5hRxjLeSpIqMKJO67c739VYf7Bj7eR1WjOO82IHcXVd", + "b5ab01fad5a008-436f76aafc896f9c6-436f76aafc896f9c6-436f76aafc896", + ] for test_string in test_data: - assert cregex.sha256_hashes(test_string) == [test_string], "SHA256 hashes regex failed on: " + test_string + assert cregex.sha256_hashes(test_string) == [test_string], ( + "SHA256 hashes regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.sha256_hashes(test_string) != [test_string], "This is not an SHA256 hash " + test_string + assert cregex.sha256_hashes(test_string) != [test_string], ( + "This is not an SHA256 hash " + test_string + ) + def test_cregex_isbn13s(): - test_data = ["978-3-16-148410-0", - "978-1-56619-909-4", - "133-1-12144-909-9"] + test_data = ["978-3-16-148410-0", "978-1-56619-909-4", "133-1-12144-909-9"] - failing_tests = ["1-56619-909-3", - "1-33342-100-1", - "2-33342-362-9"] + failing_tests = ["1-56619-909-3", "1-33342-100-1", "2-33342-362-9"] for test_string in test_data: - assert cregex.isbn13s(test_string) == [test_string], "ISBN13s regex failed on: " + test_string + assert cregex.isbn13s(test_string) == [test_string], ( + "ISBN13s regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.isbn13s(test_string) != [test_string], "This is not an ISBN13 " + test_string + assert cregex.isbn13s(test_string) != [test_string], ( + "This is not an ISBN13 " + test_string + ) + def test_cregex_isbn10s(): - test_data = ["3-16-148410-0", - "1-56619-909-4", - "1-33342-100-1"] + test_data = ["3-16-148410-0", "1-56619-909-4", "1-33342-100-1"] - failing_tests = ["978-3-16-148410-0", - "978-1-56619-909-4", - "133-1-12144-909-9"] + failing_tests = ["978-3-16-148410-0", "978-1-56619-909-4", "133-1-12144-909-9"] for test_string in test_data: - assert cregex.isbn10s(test_string) == [test_string], "ISBN10s regex failed on: " + test_string + assert cregex.isbn10s(test_string) == [test_string], ( + "ISBN10s regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.isbn10s(test_string) != [test_string], "This is not an ISBN10 " + test_string + assert cregex.isbn10s(test_string) != [test_string], ( + "This is not an ISBN10 " + test_string + ) + def test_cregex_mac_addresses(): - test_data = ["f8:2f:a4:fe:76:d2", - "F8:2F:A4:FE:76:D2", - "3D-F2-C9-A6-B3-4F"] + test_data = ["f8:2f:a4:fe:76:d2", "F8:2F:A4:FE:76:D2", "3D-F2-C9-A6-B3-4F"] - failing_tests = ["3D:F2:C9:A6:B3:4G", - "f0:2f:P4:Be:96:J5"] + failing_tests = ["3D:F2:C9:A6:B3:4G", "f0:2f:P4:Be:96:J5"] for test_string in test_data: - assert cregex.mac_addresses(test_string) == [test_string], "MAC addresses regex failed on: " + test_string + assert cregex.mac_addresses(test_string) == [test_string], ( + "MAC addresses regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.mac_addresses(test_string) != [test_string], "This is not an MAC address " + test_string + assert cregex.mac_addresses(test_string) != [test_string], ( + "This is not an MAC address " + test_string + ) + def test_cregex_git_repos(): - test_data = ["https://github.com/brootware/commonregex-improved.git", - "git@github.com:brootware/commonregex-improved.git"] + test_data = [ + "https://github.com/brootware/commonregex-improved.git", + "git@github.com:brootware/commonregex-improved.git", + ] - failing_tests = ["https://github.com/brootware/commonregex-improved", - "test@github.com:brootware/commonregex-improved.git"] + failing_tests = [ + "https://github.com/brootware/commonregex-improved", + "test@github.com:brootware/commonregex-improved.git", + ] for test_string in test_data: - assert cregex.git_repos(test_string) == [test_string], "Git repos regex failed on: " + test_string + assert cregex.git_repos(test_string) == [test_string], ( + "Git repos regex failed on: " + test_string + ) for test_string in failing_tests: - assert cregex.git_repos(test_string) != [test_string], "This is not a Git repo " + test_string \ No newline at end of file + assert cregex.git_repos(test_string) != [test_string], ( + "This is not a Git repo " + test_string + )