SiteLinkMap

From NAT, 6 Years ago, written in Bash, viewed 538 times.
URL https://code.nat.moe/view/a6f4ebfa Embed
Download Paste or View Raw
  1. #!/bin/bash
  2.  
  3. TABS=0
  4.  
  5. temp=$(mktemp -d)
  6. cd $temp
  7.  
  8. GLOBAL_IGNORE='wordpress|twitter|facebook|baidu|google'
  9.  
  10. [[ -z $MAX_DEPTH ]] && MAX_DEPTH=50
  11.  
  12. function getLinksByURL {
  13.         curl --max-time 10 -sL "$1" | awk -F'href=' '{print $2}' | awk -F'>| ' '{print $1}' | sort | uniq | grep 'http' | awk -F"/" '{print $3}' | tr -d "\"'" | sort | uniq
  14. }
  15.  
  16. function showTabs {
  17.         for i in $(seq 1 $1)
  18.         do
  19.                 echo -n "  "
  20.         done
  21. }
  22.  
  23. function showSiteLinkOut {
  24.         let TABS++
  25.         mkdir "$1" 2> /dev/null; cd "$1"
  26.         getLinksByURL "$1" | grep -vE "$1|$DONE" | while read -r out
  27.         do
  28.                 DONE="$DONE|$1"
  29.                 [[ $TABS -lt $MAX_DEPTH && -z $(grep -E "$DONE" <<< "$out") && ! "$1" == "$out" && -z $(grep -E "$GLOBAL_IGNORE" <<< "$out") && ! "$1" == "$out" ]] && echo "[$TABS]$(showTabs $TABS)Getting into $out" && showSiteLinkOut "$out" && echo "[$TABS]$(showTabs $TABS)Getting out of $out" &&let TABS-- && cd ..
  30.                 echo 0 > /dev/null
  31.         done
  32. }
  33.  
  34. echo "working at $temp"
  35. echo ">>> TRACE to $1 START"
  36. DONE=$1
  37. showSiteLinkOut $1
  38. cd $temp
  39. tree -d $1

Replies to SiteLinkMap rss

Title Name Language When
Re: SiteLinkMap NAT bash 6 Years ago.
Re: SiteLinkMap NAT bash 6 Years ago.

Reply to "SiteLinkMap"

Here you can reply to the paste above

captcha