#!/bin/bash # Seriously, if you know BaSH well, DO NOT read this code. # You gonna kill yourself, or maybe kill me, if you do. # --- STOP READING THE CODE --- TABS=0 temp=$(mktemp -d) cd $temp GLOBAL_IGNORE='wordpress|facebook|baidu|google' [[ -z $MAX_DEPTH ]] && MAX_DEPTH=50 function getLinksByURL { curl --max-time 10 -sL "$1" | awk -F'href=' '{print $2}' | awk -F'>| ' '{print $1}' | sort | uniq | grep 'http' | awk -F"/" '{print $3}' | tr -d "\"'" | sort | uniq } function showTabs { for i in $(seq 1 $1) do echo -n " " done } function showSiteLinkOut { let TABS++ mkdir "$1" 2> /dev/null; cd "$1" getLinksByURL "$1" | grep -vE "$1|$(cat $temp/DONE)" | while read -r out do echo -n "|$1" >> $temp/DONE [[ $TABS -lt $MAX_DEPTH && -z $(grep -E "$(cat $temp/DONE)" <<< "$out") && ! "$1" == "$out" && -z $(grep -E "$GLOBAL_IGNORE" <<< "$out") && ! "$1" == "$out" ]] && echo "[$TABS]$(showTabs $TABS)>>> $out" && showSiteLinkOut "$out" && echo && let TABS-- && echo "[$TABS]$(showTabs $TABS)<<< $out" && cd .. echo 0 > /dev/null done } echo "working at $temp" echo ">>> TRACE to $1 START" echo -n "$1" > $temp/DONE showSiteLinkOut $1 cd $temp tree -d $1 > "$(dirname "$0")"/tree.txt