#!/bin/bash
TABS=0
temp=$(mktemp -d)
cd $temp
GLOBAL_IGNORE='wordpress|twitter|facebook|baidu|google'
[[ -z $MAX_DEPTH ]] && MAX_DEPTH=50
function getLinksByURL {
curl --max-time 10 -sL "$1" | awk -F'href=' '{print $2}' | awk -F'>| ' '{print $1}' | sort | uniq | grep 'http' | awk -F"/" '{print $3}' | tr -d "\"'" | sort | uniq
}
function showTabs {
for i in $(seq 1 $1)
do
echo -n " "
done
}
function showSiteLinkOut {
let TABS++
mkdir "$1" 2> /dev/null; cd "$1"
getLinksByURL "$1" | grep -vE "$1|$(cat $temp/DONE)" | while read -r out
do
echo -n "|$1" >> $temp/DONE
[[ $TABS -lt $MAX_DEPTH && -z $(grep -E "$(cat $temp/DONE)" <<< "$out") && ! "$1" == "$out" && -z $(grep -E "$GLOBAL_IGNORE" <<< "$out") && ! "$1" == "$out" ]] && echo "[$TABS - $(pwd | awk -F"$temp" '{print $2}')] >>> $out" && showSiteLinkOut "$out" && let TABS-- && cd ..
echo 0 > /dev/null
done
}
echo "working at $temp"
echo ">>> TRACE to $1 START"
echo -n "$1" > $temp/DONE
showSiteLinkOut $1
cd $temp
tree -d $1 > "$(dirname "$0")"/tree.txt
Replies to Re: SiteLinkMap 
{"html5":"htmlmixed","css":"css","javascript":"javascript","php":"php","python":"python","ruby":"ruby","lua":"text\/x-lua","bash":"text\/x-sh","go":"go","c":"text\/x-csrc","cpp":"text\/x-c++src","diff":"diff","latex":"stex","sql":"sql","xml":"xml","apl":"apl","asterisk":"asterisk","c_loadrunner":"text\/x-csrc","c_mac":"text\/x-csrc","coffeescript":"text\/x-coffeescript","csharp":"text\/x-csharp","d":"d","ecmascript":"javascript","erlang":"erlang","groovy":"text\/x-groovy","haskell":"text\/x-haskell","haxe":"text\/x-haxe","html4strict":"htmlmixed","java":"text\/x-java","java5":"text\/x-java","jquery":"javascript","mirc":"mirc","mysql":"sql","ocaml":"text\/x-ocaml","pascal":"text\/x-pascal","perl":"perl","perl6":"perl","plsql":"sql","properties":"text\/x-properties","q":"text\/x-q","scala":"scala","scheme":"text\/x-scheme","tcl":"text\/x-tcl","vb":"text\/x-vb","verilog":"text\/x-verilog","yaml":"text\/x-yaml","z80":"text\/x-z80"}