scraper.sh: Finish refactor

pull/57/merge
Jesus Alvarez 9 years ago
parent f0394ee276
commit ea91c5605d
  1. 178
      scraper.sh

@ -1,17 +1,35 @@
#!/bin/bash #!/bin/bash -e
DIR="$( cd "$( dirname "$0" )" && pwd )"
source $DIR/lib.sh #
source $DIR/conf.sh # A script for scraping data from the web. When ran in cron with a correct email address configured, an alert email will be
# sent notifying the user that either the "linux" kernel package version has changed, a new ZFSonLinux version has been
# released, or a new archiso has been released.
#
NAME=$(basename $0)
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
if ! source ${SCRIPT_DIR}/lib.sh; then
echo "!! ERROR !! -- Could not load lib.sh!"
fi
if ! source ${SCRIPT_DIR}/conf.sh; then
error "Could not load conf.sh!"
fi
trap 'trap_abort' INT QUIT TERM HUP trap 'trap_abort' INT QUIT TERM HUP
trap 'trap_exit' EXIT trap 'trap_exit' EXIT
usage() { usage() {
echo "scraper.sh - A cheap webpage scraper." echo "${NAME} - A cheap webpage scraper."
echo echo
echo "Usage: scraper.sh [options]" echo "Usage: ${NAME} [options]"
echo echo
echo "Options:" echo "Options:"
echo echo
@ -19,26 +37,30 @@ usage() {
echo " -n: Dry run." echo " -n: Dry run."
echo " -d: Show debug info." echo " -d: Show debug info."
echo echo
echo "Examples:" echo "Examples:"
echo echo
echo " scraper.sh -d :: Show debug output." echo " ${NAME} -d :: Show debug output."
echo " scraper.sh -n :: Don't run commands, but show output." echo " ${NAME} -n :: Don't run commands, but show output."
trap - EXIT # Prevents exit log output
} }
ARGS=("$@") ARGS=("$@")
for (( a = 0; a < $#; a++ )); do for (( a = 0; a < $#; a++ )); do
if [[ ${ARGS[$a]} == "-h" ]]; then if [[ ${ARGS[$a]} == "-n" ]]; then
usage;
exit 0;
elif [[ ${ARGS[$a]} == "-n" ]]; then
DRY_RUN=1 DRY_RUN=1
elif [[ ${ARGS[$a]} == "-d" ]]; then elif [[ ${ARGS[$a]} == "-d" ]]; then
DEBUG=1 DEBUG=1
elif [[ ${ARGS[$a]} == "-h" ]]; then
usage;
exit 0;
fi fi
done done
CHECK_WEBPAGE_RETVAL=0 CHECK_WEBPAGE_RETVAL=0
check_webpage() { check_webpage() {
# $1: The url to scrape # $1: The url to scrape
# $2: The Perl regex to match with # $2: The Perl regex to match with
@ -46,105 +68,117 @@ check_webpage() {
debug "Checking webpage: $1" debug "Checking webpage: $1"
debug "Using regex: `printf "%q" "$2"`" debug "Using regex: `printf "%q" "$2"`"
debug "Expecting: $3" debug "Expecting: $3"
PAGE=""
if [[ $DEBUG == 1 ]]; then run_cmd_no_output "curl -sL ${1}"
PAGE=$(curl -vsL "${1}"; echo "RETVAL: $?")
else if [[ ${DRY_RUN} -eq 1 ]]; then
PAGE=$(curl -sL "${1}"; echo "RETVAL: $?") return
fi fi
if [[ $(echo $PAGE | grep -q "504 Gateway Timeout"; echo $?) == 0 ]]; then
# error "IN HERE YO 1" if [[ $(echo ${RUN_CMD_OUTPUT} | \grep -q "504 Gateway Timeout"; echo $?) -eq 0 ]]; then
CHECK_WEBPAGE_RETVAL=-1 CHECK_WEBPAGE_RETVAL=-1
return return
elif [[ $(echo $PAGE | grep -q "503 Service Unavailable"; echo $?) == 0 ]]; then elif [[ $(echo ${RUN_CMD_OUTPUT} | \grep -q "503 Service Unavailable"; echo $?) -eq 0 ]]; then
# error "IN HERE YO 2"
CHECK_WEBPAGE_RETVAL=-1 CHECK_WEBPAGE_RETVAL=-1
return return
elif [[ $PAGE == "RETVAL: 7" ]]; then elif [[ ${RUN_CMD_OUTPUT} == "RETVAL: 7" ]]; then
# error "IN HERE YO 3"
CHECK_WEBPAGE_RETVAL=-1 CHECK_WEBPAGE_RETVAL=-1
return return
fi fi
# debug "Page: ${PAGE}"
SCRAPED_STRING=$(echo "${PAGE}" | \grep -Po -m 1 "${2}") SCRAPED_STRING=$(echo "${RUN_CMD_OUTPUT}" | \grep -Po -m 1 "${2}")
debug "Got \"$SCRAPED_STRING\" from webpage." debug "Got \"${SCRAPED_STRING}\" from webpage."
if [[ $SCRAPED_STRING != "$3" ]]; then
error "PAGE: $PAGE" if [[ ${SCRAPED_STRING} != "$3" ]]; then
error "Checking \"$1\" expected \"$3\" got \"$SCRAPED_STRING\"" error "Checking '$1' expected '$3' got '${SCRAPED_STRING}'"
debug "Returning 1 from check_webpage()" debug "Returning 1 from check_webpage()"
CHECK_WEBPAGE_RETVAL=1 CHECK_WEBPAGE_RETVAL=1
return return
fi fi
CHECK_WEBPAGE_RETVAL=0 CHECK_WEBPAGE_RETVAL=0
return return
} }
check_result() { check_result() {
# $1 current line # $1 current line
# $2 changed line # $2 changed line
if [[ $CHECK_WEBPAGE_RETVAL == 0 ]]; then if [[ ${CHECK_WEBPAGE_RETVAL} -eq 0 ]]; then
msg2 "The $1 version is current." msg2 "The $1 version is current."
elif [[ $CHECK_WEBPAGE_RETVAL == 1 ]]; then elif [[ ${CHECK_WEBPAGE_RETVAL} -eq 1 ]]; then
error "The $2 is out-of-date!" error "The $2 is out-of-date!"
HAS_ERROR=1 HAS_ERROR=1
elif [[ $CHECK_WEBPAGE_RETVAL == -1 ]]; then elif [[ ${CHECK_WEBPAGE_RETVAL} -eq -1 ]]; then
warning "The $2 package page was unreachable!" warning "The $2 package page was unreachable!"
else else
error "Check returned $CHECK_WEBPAGE_RETVAL" error "Check returned ${CHECK_WEBPAGE_RETVAL}"
HAS_ERROR=1 HAS_ERROR=1
fi fi
} }
HAS_ERROR=0 HAS_ERROR=0
# Bail if no internet # Bail if no internet
# Please thank Comcast for this requirement...
if [[ $(ping -w 1 -c 1 8.8.8.8 &> /dev/null; echo $?) != 0 ]]; then if [[ $(ping -w 1 -c 1 8.8.8.8 &> /dev/null; echo $?) != 0 ]]; then
exit 0; exit 0;
fi fi
msg "scraper.sh started..."
# msg "${NAME} started..."
# Check archiso kernel version (this will change when the archiso is updated)
#
msg "Checking archiso download page for linux kernel version changes..."
check_webpage "https://www.archlinux.org/download/" "(?<=Included Kernel:</strong> )[\d\.]+" "$AZB_KERNEL_ARCHISO_VERSION"
check_result "archiso kernel version" "archiso"
#
# Check i686 linux kernel version
#
msg "Checking the online package database for i686 linux kernel version changes..."
check_webpage "https://www.archlinux.org/packages/core/i686/linux/" "(?<=<h2>linux )[\d\.-]+(?=</h2>)" "$AZB_GIT_KERNEL_X32_VERSION"
check_result "i686 linux kernel package" "linux i686"
# check_archiso() {
# Check x86_64 linux kernel version #
# # Check archiso kernel version (this will change when the archiso is updated)
msg "Checking the online package database for x86_64 linux kernel version changes..." #
check_webpage "https://www.archlinux.org/packages/core/x86_64/linux/" "(?<=<h2>linux )[\d\.-]+(?=</h2>)" "$AZB_GIT_KERNEL_X64_VERSION" msg "Checking archiso download page for linux kernel version changes..."
check_result "x86_64 linux kernel package" "linux x86_64" check_webpage "https://www.archlinux.org/download/" "(?<=Included Kernel:</strong> )[\d\.]+" \
"${AZB_ARCHISO_KERNEL_VERSION}"
check_result "archiso kernel version" "archiso"
}
#
# Check i686 linux-lts kernel version
#
msg "Checking the online package database for i686 linux-lts kernel version changes..."
check_webpage "https://www.archlinux.org/packages/core/i686/linux-lts/" "(?<=<h2>linux-lts )[\d\.-]+(?=</h2>)" "$AZB_LTS_KERNEL_X32_VERSION"
check_result "i686 linux-lts kernel package" "linux-lts i686"
# check_linux_kernel() {
# Check x86_64 linux-lts kernel version #
# # Check x86_64 linux kernel version
msg "Checking the online package database for x86_64 linux-lts kernel version changes..." #
check_webpage "https://www.archlinux.org/packages/core/x86_64/linux-lts/" "(?<=<h2>linux-lts )[\d\.-]+(?=</h2>)" "$AZB_LTS_KERNEL_X64_VERSION" msg "Checking the online package database for x86_64 linux kernel version changes..."
check_result "x86_64 linux-lts kernel package" "linux-lts x86_64" check_webpage "https://www.archlinux.org/packages/core/x86_64/linux/" "(?<=<h2>linux )[\d\.-]+(?=</h2>)" \
"${AZB_DEF_KERNEL_VERSION}"
check_result "x86_64 linux kernel package" "linux x86_64"
}
check_linux_lts_kernel() {
#
# Check x86_64 linux-lts kernel version
#
msg "Checking the online package database for x86_64 linux-lts kernel version changes..."
check_webpage "https://www.archlinux.org/packages/core/x86_64/linux-lts/" "(?<=<h2>linux-lts )[\d\.-]+(?=</h2>)" \
"${AZB_LTS_KERNEL_VERSION}"
check_result "x86_64 linux-lts kernel package" "linux-lts x86_64"
}
check_zol_version() {
#
# Check ZFSonLinux.org
#
msg "Checking zfsonlinux.org for new versions..."
check_webpage "http://zfsonlinux.org/" "(?<=downloads/zfsonlinux/spl/spl-)[\d\.]+(?=.tar.gz)" "${AZB_ZOL_VERSION}"
check_result "ZOL stable version" "ZOL stable version"
}
check_archiso
check_linux_kernel
check_linux_lts_kernel
check_zol_version
#
# Check ZFSonLinux.org
#
msg "Checking zfsonlinux.org for new versions..."
check_webpage "http://zfsonlinux.org/" "(?<=downloads/zfsonlinux/spl/spl-)[\d\.]+(?=.tar.gz)" "$AZB_ZOL_VERSION"
check_result "ZOL stable version" "ZOL stable version"
# #
# This is the end # This is the end
@ -152,6 +186,6 @@ check_result "ZOL stable version" "ZOL stable version"
# This is the end # This is the end
# My only friend, the end # My only friend, the end
# #
if [[ $HAS_ERROR -eq 1 ]]; then if [[ ${HAS_ERROR} -eq 1 ]]; then
exit 1; exit 1;
fi fi

Loading…
Cancel
Save