#!/bin/bash
# BiliBili Anaylzer: Analzye the relationship between bilibili users
# Start from
START_UID=4851356
# Extention of temp files
TEMP_EXT=.tmp
# Extention of the crawlwd mark
CRAWLED_EXT=.crawled
# Save relation to
OUTPUT_FILE=bilibili.prolog
# Start from page
PAGE=1
function crawl {
while true
do
[[ ! -z $1 ]] && CURRENT_UID=$1 || CURRENT_UID=$START_UID # If parameter is given, use it as start UID
echo "[$(date)] Crwaling $CURRENT_UID..." # Print status
curl http://space.bilibili.com/$CURRENT_UID/fans.html?page=$PAGE 2> /dev/null > $CURRENT_UID$TEMP_EXT # Get the page of follwers of current page
DATAS=$(cat $CURRENT_UID.tmp | grep http://space.bilibili.com/ | awk -F\" '{print $6}' | grep html | awk -F/ '{print $4}') # Parser the page and get the UID of the followers
[[ $DATAS == "" ]] && return 0 # No followers? Possiblly the last page. Or... we meet a poor guy
for DATA in $DATAS
do
[[ ! -z $(cat $CURRENT_UID$CRAWLED_EXT 2> /dev/null | grep $DATA) ]] && return 0 # Already crawled? Okay, let's get away.
echo "follow($DATA,$CURRENT_UID)." >> $OUTPUT_FILE # Write the follower data to output
echo $DATA >> $CURRENT_UID$CRAWLED_EXT # Mark as crawled.
crawl $DATA # Crawl this follower
done
nextpage # Done? Go next page.
done
}
function nextpage {
echo $PAGE+1 | bc
}
rm *$CRAWLED_EXT 2> /dev/null
rm *$TEMP_EXT
$*
BiliBili Followers crawl
From NAT, 7 Years ago, written in Bash, viewed 987 times.
This paste is a reply to BiliBili Followers crawl from NAT
- go back
URL https://code.nat.moe/view/7e51943c/diff
Embed
Viewing differences between BiliBili Followers crawl and BiliBili Followers crawl
— Expand Paste to full width of browser