Re: SiteLinkMap

From NAT, 6 Years ago, written in Bash, viewed 546 times. This paste is a reply to SiteLinkMap from NAT - view diff
URL https://code.nat.moe/view/65f7e828 Embed
Download Paste or View Raw
  1. #!/bin/bash
  2. # Seriously, if you know BaSH well, DO NOT read this code.
  3. # You gonna kill yourself, or maybe kill me, if you do.
  4. # --- STOP READING THE CODE ---
  5.  
  6. TABS=0
  7.  
  8. temp=$(mktemp -d)
  9. cd $temp
  10.  
  11. GLOBAL_IGNORE='wordpress|facebook|baidu|google'
  12.  
  13. [[ -z $MAX_DEPTH ]] && MAX_DEPTH=50
  14.  
  15. function getLinksByURL {
  16.         curl --max-time 10 -sL "$1" | awk -F'href=' '{print $2}' | awk -F'>| ' '{print $1}' | sort | uniq | grep 'http' | awk -F"/" '{print $3}' | tr -d "\"'" | sort | uniq
  17. }
  18.  
  19. function showTabs {
  20.         for i in $(seq 1 $1)
  21.         do
  22.                 echo -n "  "
  23.         done
  24. }
  25.  
  26. function showSiteLinkOut {
  27.         let TABS++
  28.         mkdir "$1" 2> /dev/null; cd "$1"
  29.         getLinksByURL "$1" | grep -vE "$1|$(cat $temp/DONE)" | while read -r out
  30.         do
  31.                 echo -n "|$1" >> $temp/DONE
  32.                 [[ $TABS -lt $MAX_DEPTH && -z $(grep -E "$(cat $temp/DONE)" <<< "$out") && ! "$1" == "$out" && -z $(grep -E "$GLOBAL_IGNORE" <<< "$out") && ! "$1" == "$out" ]] && echo "[$TABS]$(showTabs $TABS)>>> $out" && showSiteLinkOut "$out" && echo && let TABS-- && echo "[$TABS]$(showTabs $TABS)<<< $out"  && cd ..
  33.                 echo 0 > /dev/null
  34.         done
  35. }
  36.  
  37. echo "working at $temp"
  38. echo ">>> TRACE to $1 START"
  39. echo -n "$1" > $temp/DONE
  40. showSiteLinkOut $1
  41. cd $temp
  42. tree -d $1 > "$(dirname "$0")"/tree.txt

Reply to "Re: SiteLinkMap"

Here you can reply to the paste above

captcha