Re: SiteLinkMap

From NAT, 6 Years ago, written in Bash, viewed 619 times. This paste is a reply to SiteLinkMap from NAT - view diff
URL https://code.nat.moe/view/7f0fc2ee Embed
Download Paste or View Raw
  1. #!/bin/bash
  2.  
  3. TABS=0
  4.  
  5. temp=$(mktemp -d)
  6. cd $temp
  7.  
  8. GLOBAL_IGNORE='wordpress|twitter|facebook|baidu|google'
  9.  
  10. [[ -z $MAX_DEPTH ]] && MAX_DEPTH=50
  11.  
  12. function getLinksByURL {
  13.         curl --max-time 10 -sL "$1" | awk -F'href=' '{print $2}' | awk -F'>| ' '{print $1}' | sort | uniq | grep 'http' | awk -F"/" '{print $3}' | tr -d "\"'" | sort | uniq
  14. }
  15.  
  16. function showTabs {
  17.         for i in $(seq 1 $1)
  18.         do
  19.                 echo -n "  "
  20.         done
  21. }
  22.  
  23. function showSiteLinkOut {
  24.         let TABS++
  25.         mkdir "$1" 2> /dev/null; cd "$1"
  26.         getLinksByURL "$1" | grep -vE "$1|$(cat $temp/DONE)" | while read -r out
  27.         do
  28.                 echo -n "|$1" >> $temp/DONE
  29.                 [[ $TABS -lt $MAX_DEPTH && -z $(grep -E "$(cat $temp/DONE)" <<< "$out") && ! "$1" == "$out" && -z $(grep -E "$GLOBAL_IGNORE" <<< "$out") && ! "$1" == "$out" ]] && echo "[$TABS - $(pwd | awk -F"$temp" '{print $2}')] >>> $out" && showSiteLinkOut "$out" && let TABS-- && cd ..
  30.                 echo 0 > /dev/null
  31.         done
  32. }
  33.  
  34. echo "working at $temp"
  35. echo ">>> TRACE to $1 START"
  36. echo -n "$1" > $temp/DONE
  37. showSiteLinkOut $1
  38. cd $temp
  39. tree -d $1 > "$(dirname "$0")"/tree.txt

Replies to Re: SiteLinkMap rss

Title Name Language When
Re: Re: SiteLinkMap NAT bash 6 Years ago.

Reply to "Re: SiteLinkMap"

Here you can reply to the paste above

captcha