Re: Re: SiteLinkMap

From NAT, 6 Years ago, written in Bash, viewed 603 times. This paste is a reply to Re: SiteLinkMap from NAT - view diff
URL https://code.nat.moe/view/8ca701a3 Embed
Download Paste or View Raw
  1. #!/bin/bash
  2.  
  3. TABS=0
  4.  
  5. temp=$(mktemp -d)
  6. cd $temp
  7.  
  8. GLOBAL_IGNORE='wordpress|twitter|facebook|baidu|google'
  9.  
  10. [[ -z $MAX_DEPTH ]] && MAX_DEPTH=51 || let MAX_DEPTH++ || exit 1
  11.  
  12. function getLinksByURL {
  13.         [[ $TABS == $MAX_DEPTH ]] && return
  14.         curl --max-time 10 -sL "$1" | awk -F'href=' '{print $2}' | awk -F'>| ' '{print $1}' | sort | uniq | grep 'http' | awk -F"/" '{print $3}' | tr -d "\"'" | sort | uniq
  15. }
  16.  
  17. function showTabs {
  18.         for i in $(seq 1 $1)
  19.         do
  20.                 echo -n "  "
  21.         done
  22. }
  23.  
  24. function showSiteLinkOut {
  25.         let TABS++
  26.         mkdir "$1" 2> /dev/null; cd "$1"
  27.         getLinksByURL "$1" | grep -vE "$1|$(cat $temp/DONE)" | while read -r out
  28.         do
  29.                 echo -n "|$1" >> $temp/DONE
  30.                 [[ $TABS -lt $MAX_DEPTH && -z $(grep -E "$(cat $temp/DONE)" <<< "$out") && ! "$1" == "$out" && -z $(grep -E "$GLOBAL_IGNORE" <<< "$out") && ! "$1" == "$out" ]] && echo "[$TABS - $(pwd | awk -F"$temp" '{print $2}')] >>> $out" && showSiteLinkOut "$out" && let TABS-- && cd ..
  31.                 echo 0 > /dev/null
  32.         done
  33. }
  34.  
  35. echo "working at $temp"
  36. echo ">>> TRACE to $1 START"
  37. echo -n "$1" > $temp/DONE
  38. showSiteLinkOut $1
  39. cd $temp
  40. tree -d $1
  41. rm -r $temp

Replies to Re: Re: SiteLinkMap rss

Title Name Language When
Re: Re: Re: SiteLinkMap NAT bash 6 Years ago.

Reply to "Re: Re: SiteLinkMap"

Here you can reply to the paste above

captcha