Ever wanted to see everything the WayBack Machine from the internet archive has on a domain? Here you go:

Just give it the url you want it to search for and if you DO want it to include subdomains add True at the end.

python3 waybackurls.py example.com true <—with subdomains

python3 waybackurls.py example.com <—without subdomains

It’ll save the results in a nice little json file in the same directory.

import requests
import sys
import json


def waybackurls(host, with_subs):
    if with_subs:
        url = 'http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey' % host
    else:
        url = 'http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey' % host
    r = requests.get(url)
    results = r.json()
    return results[1:]


if __name__ == '__main__':
    argc = len(sys.argv)
    if argc < 2:
        print('Usage:\n\tpython3 waybackurls.py <url> <include_subdomains:optional>')
        sys.exit()

    host = sys.argv[1]
    with_subs = False
    if argc > 3:
        with_subs = True

    urls = waybackurls(host, with_subs)
    json_urls = json.dumps(urls)
    if urls:
        filename = '%s-waybackurls.json' % host
        with open(filename, 'w') as f:
            f.write(json_urls)
        print('[*] Saved results to %s' % filename)
    else:
        print('[-] Found nothing')