wenku8_fetch_te

From NAT, 6 Years ago, written in Bash, viewed 787 times. This paste is a reply to wenku8_fetch_se from NAT - view diff
URL https://code.nat.moe/view/b01af357 Embed
Download Paste or View Raw
  1. #!/bin/bash
  2. # A tool to download all novels on wenku8.com, the third edition.
  3. # - Added support for multi process
  4.  
  5. STORE_PATH=./save/
  6. FETCH_URL='http://www.wenku8.com/modules/article/articlelist.php?page='
  7. DOWNLOAD_TYPE="utf8"
  8. DOWNLOAD_URL="http://dl.wenku8.com/txt$DOWNLOAD_TYPE/__K/__ID.txt"
  9. DOWNLOAD_FALLBACK="http://dl.wenku8.com/down.php?type=$DOWNLOAD_TYPE&id="
  10. TEMP=temp.tmp
  11. FROM=1
  12. TO=93
  13. TIMESTEMP_FORMAT="%H:%M:%S"
  14. THREADS=20
  15.  
  16. for page in $(seq $FORM $TO)
  17. do
  18.         echo "[$(date +$TIMESTEMP_FORMAT)] Starting page $page"
  19.         curl $FETCH_URL$page 2> /dev/null > $TEMP
  20.         cat $TEMP | iconv -f gbk -t utf-8 | grep 'font-size:13px;' | sed -e 's/.*book\///g; s/.htm">/ /g; s/<\/a><\/b>//g;' > title$TEMP
  21.         _ids=$(cat $TEMP | iconv -f gbk -t utf-8 | grep 'font-size:13px;' | sed -e 's/.*book\///g; s/\.htm.*//g')
  22.         let threads=0
  23.         for novel in $_ids
  24.         do
  25.                 let threads++
  26.                 {
  27.                         _this_name=$(cat title$TEMP|grep $novel)
  28.                         _this_url="$(echo $DOWNLOAD_URL|sed -e "s/__K/1/; s/__ID/$novel/;")"
  29.                         _this_save="$STORE_PATH/$(echo $_this_name|tr ' ' '_'|dos2unix 2>/dev/null).txt"
  30.                         echo "[$(date +$TIMESTEMP_FORMAT)] Downloading $_this_name"
  31.                         curl $_this_url > $_this_save 2> /dev/null
  32.                         [[ ! -z $(cat $_this_save | grep '404 Not Found') ]] && {
  33.                                 echo "[$(date +$TIMESTEMP_FORMAT)] received 404 Not Found, trying fallback for $_this_name"
  34.                                 _this_url="http://dl.wenku8.com$(curl -I "$DOWNLOAD_FALLBACK$novel" 2> /dev/null | grep Location | awk -F:\  '{print $2}')"
  35.                                 curl $_this_url > $_this_save 2> /dev/null
  36.                                 echo "[$(date +$TIMESTEMP_FORMAT)] fallback done for $_this_name"
  37.                         } || echo "[$(date +$TIMESTEMP_FORMAT)] Done $_this_name"
  38.                 } &
  39.                 [[ $threads == $THREADS ]] && threads=0 && wait
  40.         done
  41. done

Replies to wenku8_fetch_te rss

Title Name Language When
wenku8_fetch_te_fix1 NAT bash 6 Years ago.

Reply to "wenku8_fetch_te"

Here you can reply to the paste above

captcha