OSINT for Web Endpoint Extraction
1. Wayback Machine (web.archive.org)
echo "target.com" | xargs -I {} curl -s "https://web.archive.org/cdx/search/cdx?url={}/*&output=text&fl=original&collapse=urlkey"
2. URLScan.io
echo "target.com" | xargs -I {} curl -s "https://urlscan.io/api/v1/search/?q=domain:{}&size=100" | jq -r '.results[].page.url' | sort -V | uniq
3. AlienVault Open Threat Exchange (otx.alienvault.com)
echo "target.com" | xargs -I {} curl -s "https://otx.alienvault.com/api/v1/indicators/domain/{}/url_list?limit=500&page=1" | jq -r '.url_list[].url'
4. CommonCrawl.org
target="target.com"
for commoncrawl in $(curl -s "http://index.commoncrawl.org/collinfo.json" | jq -r '.[]."cdx-api"')
do
curl -s "${commoncrawl}?url=*.${target}&output=json" | jq -r .url
done