From ea91c5605d59f8c8012f895ea5c71c8cf5d96ee0 Mon Sep 17 00:00:00 2001 From: Jesus Alvarez Date: Mon, 18 Apr 2016 18:32:34 -0700 Subject: [PATCH] scraper.sh: Finish refactor --- scraper.sh | 178 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 106 insertions(+), 72 deletions(-) diff --git a/scraper.sh b/scraper.sh index ded394c..6836214 100755 --- a/scraper.sh +++ b/scraper.sh @@ -1,17 +1,35 @@ -#!/bin/bash +#!/bin/bash -e -DIR="$( cd "$( dirname "$0" )" && pwd )" -source $DIR/lib.sh -source $DIR/conf.sh +# +# A script for scraping data from the web. When ran in cron with a correct email address configured, an alert email will be +# sent notifying the user that either the "linux" kernel package version has changed, a new ZFSonLinux version has been +# released, or a new archiso has been released. +# + + +NAME=$(basename $0) +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + + +if ! source ${SCRIPT_DIR}/lib.sh; then + echo "!! ERROR !! -- Could not load lib.sh!" +fi + + +if ! source ${SCRIPT_DIR}/conf.sh; then + error "Could not load conf.sh!" +fi + trap 'trap_abort' INT QUIT TERM HUP trap 'trap_exit' EXIT + usage() { - echo "scraper.sh - A cheap webpage scraper." + echo "${NAME} - A cheap webpage scraper." echo - echo "Usage: scraper.sh [options]" + echo "Usage: ${NAME} [options]" echo echo "Options:" echo @@ -19,26 +37,30 @@ usage() { echo " -n: Dry run." echo " -d: Show debug info." echo - echo "Examples:" + echo "Examples:" echo - echo " scraper.sh -d :: Show debug output." - echo " scraper.sh -n :: Don't run commands, but show output." + echo " ${NAME} -d :: Show debug output." + echo " ${NAME} -n :: Don't run commands, but show output." + trap - EXIT # Prevents exit log output } + ARGS=("$@") for (( a = 0; a < $#; a++ )); do - if [[ ${ARGS[$a]} == "-h" ]]; then - usage; - exit 0; - elif [[ ${ARGS[$a]} == "-n" ]]; then + if [[ ${ARGS[$a]} == "-n" ]]; then DRY_RUN=1 elif [[ ${ARGS[$a]} == "-d" ]]; then DEBUG=1 + elif [[ ${ARGS[$a]} == "-h" ]]; then + usage; + exit 0; fi done + CHECK_WEBPAGE_RETVAL=0 + check_webpage() { # $1: The url to scrape # $2: The Perl regex to match with @@ -46,105 +68,117 @@ check_webpage() { debug "Checking webpage: $1" debug "Using regex: `printf "%q" "$2"`" debug "Expecting: $3" - PAGE="" - if [[ $DEBUG == 1 ]]; then - PAGE=$(curl -vsL "${1}"; echo "RETVAL: $?") - else - PAGE=$(curl -sL "${1}"; echo "RETVAL: $?") + + run_cmd_no_output "curl -sL ${1}" + + if [[ ${DRY_RUN} -eq 1 ]]; then + return fi - if [[ $(echo $PAGE | grep -q "504 Gateway Timeout"; echo $?) == 0 ]]; then - # error "IN HERE YO 1" + + if [[ $(echo ${RUN_CMD_OUTPUT} | \grep -q "504 Gateway Timeout"; echo $?) -eq 0 ]]; then CHECK_WEBPAGE_RETVAL=-1 return - elif [[ $(echo $PAGE | grep -q "503 Service Unavailable"; echo $?) == 0 ]]; then - # error "IN HERE YO 2" + elif [[ $(echo ${RUN_CMD_OUTPUT} | \grep -q "503 Service Unavailable"; echo $?) -eq 0 ]]; then CHECK_WEBPAGE_RETVAL=-1 return - elif [[ $PAGE == "RETVAL: 7" ]]; then - # error "IN HERE YO 3" + elif [[ ${RUN_CMD_OUTPUT} == "RETVAL: 7" ]]; then CHECK_WEBPAGE_RETVAL=-1 return fi - # debug "Page: ${PAGE}" - SCRAPED_STRING=$(echo "${PAGE}" | \grep -Po -m 1 "${2}") - debug "Got \"$SCRAPED_STRING\" from webpage." - if [[ $SCRAPED_STRING != "$3" ]]; then - error "PAGE: $PAGE" - error "Checking \"$1\" expected \"$3\" got \"$SCRAPED_STRING\"" + + SCRAPED_STRING=$(echo "${RUN_CMD_OUTPUT}" | \grep -Po -m 1 "${2}") + debug "Got \"${SCRAPED_STRING}\" from webpage." + + if [[ ${SCRAPED_STRING} != "$3" ]]; then + error "Checking '$1' expected '$3' got '${SCRAPED_STRING}'" debug "Returning 1 from check_webpage()" CHECK_WEBPAGE_RETVAL=1 return fi + CHECK_WEBPAGE_RETVAL=0 return } + check_result() { # $1 current line # $2 changed line - if [[ $CHECK_WEBPAGE_RETVAL == 0 ]]; then + if [[ ${CHECK_WEBPAGE_RETVAL} -eq 0 ]]; then msg2 "The $1 version is current." - elif [[ $CHECK_WEBPAGE_RETVAL == 1 ]]; then + elif [[ ${CHECK_WEBPAGE_RETVAL} -eq 1 ]]; then error "The $2 is out-of-date!" HAS_ERROR=1 - elif [[ $CHECK_WEBPAGE_RETVAL == -1 ]]; then + elif [[ ${CHECK_WEBPAGE_RETVAL} -eq -1 ]]; then warning "The $2 package page was unreachable!" else - error "Check returned $CHECK_WEBPAGE_RETVAL" + error "Check returned ${CHECK_WEBPAGE_RETVAL}" HAS_ERROR=1 fi } + HAS_ERROR=0 + # Bail if no internet +# Please thank Comcast for this requirement... if [[ $(ping -w 1 -c 1 8.8.8.8 &> /dev/null; echo $?) != 0 ]]; then exit 0; fi -msg "scraper.sh started..." -# -# Check archiso kernel version (this will change when the archiso is updated) -# -msg "Checking archiso download page for linux kernel version changes..." -check_webpage "https://www.archlinux.org/download/" "(?<=Included Kernel: )[\d\.]+" "$AZB_KERNEL_ARCHISO_VERSION" -check_result "archiso kernel version" "archiso" +msg "${NAME} started..." -# -# Check i686 linux kernel version -# -msg "Checking the online package database for i686 linux kernel version changes..." -check_webpage "https://www.archlinux.org/packages/core/i686/linux/" "(?<=

linux )[\d\.-]+(?=

)" "$AZB_GIT_KERNEL_X32_VERSION" -check_result "i686 linux kernel package" "linux i686" -# -# Check x86_64 linux kernel version -# -msg "Checking the online package database for x86_64 linux kernel version changes..." -check_webpage "https://www.archlinux.org/packages/core/x86_64/linux/" "(?<=

linux )[\d\.-]+(?=

)" "$AZB_GIT_KERNEL_X64_VERSION" -check_result "x86_64 linux kernel package" "linux x86_64" +check_archiso() { + # + # Check archiso kernel version (this will change when the archiso is updated) + # + msg "Checking archiso download page for linux kernel version changes..." + check_webpage "https://www.archlinux.org/download/" "(?<=Included Kernel: )[\d\.]+" \ + "${AZB_ARCHISO_KERNEL_VERSION}" + check_result "archiso kernel version" "archiso" +} -# -# Check i686 linux-lts kernel version -# -msg "Checking the online package database for i686 linux-lts kernel version changes..." -check_webpage "https://www.archlinux.org/packages/core/i686/linux-lts/" "(?<=

linux-lts )[\d\.-]+(?=

)" "$AZB_LTS_KERNEL_X32_VERSION" -check_result "i686 linux-lts kernel package" "linux-lts i686" -# -# Check x86_64 linux-lts kernel version -# -msg "Checking the online package database for x86_64 linux-lts kernel version changes..." -check_webpage "https://www.archlinux.org/packages/core/x86_64/linux-lts/" "(?<=

linux-lts )[\d\.-]+(?=

)" "$AZB_LTS_KERNEL_X64_VERSION" -check_result "x86_64 linux-lts kernel package" "linux-lts x86_64" +check_linux_kernel() { + # + # Check x86_64 linux kernel version + # + msg "Checking the online package database for x86_64 linux kernel version changes..." + check_webpage "https://www.archlinux.org/packages/core/x86_64/linux/" "(?<=

linux )[\d\.-]+(?=

)" \ + "${AZB_DEF_KERNEL_VERSION}" + check_result "x86_64 linux kernel package" "linux x86_64" +} + + +check_linux_lts_kernel() { + # + # Check x86_64 linux-lts kernel version + # + msg "Checking the online package database for x86_64 linux-lts kernel version changes..." + check_webpage "https://www.archlinux.org/packages/core/x86_64/linux-lts/" "(?<=

linux-lts )[\d\.-]+(?=

)" \ + "${AZB_LTS_KERNEL_VERSION}" + check_result "x86_64 linux-lts kernel package" "linux-lts x86_64" +} + + +check_zol_version() { + # + # Check ZFSonLinux.org + # + msg "Checking zfsonlinux.org for new versions..." + check_webpage "http://zfsonlinux.org/" "(?<=downloads/zfsonlinux/spl/spl-)[\d\.]+(?=.tar.gz)" "${AZB_ZOL_VERSION}" + check_result "ZOL stable version" "ZOL stable version" +} + + +check_archiso +check_linux_kernel +check_linux_lts_kernel +check_zol_version -# -# Check ZFSonLinux.org -# -msg "Checking zfsonlinux.org for new versions..." -check_webpage "http://zfsonlinux.org/" "(?<=downloads/zfsonlinux/spl/spl-)[\d\.]+(?=.tar.gz)" "$AZB_ZOL_VERSION" -check_result "ZOL stable version" "ZOL stable version" # # This is the end @@ -152,6 +186,6 @@ check_result "ZOL stable version" "ZOL stable version" # This is the end # My only friend, the end # -if [[ $HAS_ERROR -eq 1 ]]; then +if [[ ${HAS_ERROR} -eq 1 ]]; then exit 1; fi