#!/bin/bash # Copyright (C) 2004,2005,2006 Adalbert Prokop, adalbert.prokop(%)web.de # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # Last change: 2006-06-02 declare -i START declare -i END declare -i STEP declare -i TRIES declare -i NUM declare -i LENGTH declare -i LEADING_ZEROS declare -i RETVAL declare -i ERROR declare -i ERROR_COUNT declare -i REUSE_CONNECTION DEFAULT_AGENT="Opera/7.23 (X11; Linux i686; U) [en]" START=1 END=0 STEP=1 TRIES=0 LENGTH=0 ERROR=0 ERROR_COUNT=0 LEADING_ZEROS=0 REUSE_CONNECTION=1 WGET=/usr/bin/wget function print_help() { cat << EOF Usage: $(basename $0) [-a ] [-r ] [-w] [-s ] [-t ] [] -a agent User agent, aka browser, i.e. Opera, Mozilla, etc. Default is $DEFAULT_AGENT -c Continue with partialy downloaded files. -h Show this help. -l length Number length. Implicitely sets -w -p pass Password, if site requires one. Only valid if username is given. -r ref_url REFERER URL, may contain @@ . -s step Increment value. Default is $STEP -t tries Number of dead URLs before giving up. Default is infinite. Has to be specified, if no end number is given. -u user Username, if site needs one. -w Insert leading zeros. url URL to fetch. Any occurence of @@ will be substituted by current number. start Start number, default is $START. end End number. Default is infinite. EOF } function is_number() { declare -i tmp tmp="$1" if [ $tmp -eq 0 -a "$1" != 0 ]; then return 1 else return 0 fi } function produce_file_list() { while [ $END = 0 -o $NUM -le $END ]; do if [ $LEADING_ZEROS = 1 ]; then THIS_NUM=$(printf "%.${LENGTH}d" $NUM) else THIS_NUM=$NUM fi if [ "$REFERER" ]; then if [ $REUSE_CONNECTION = 1 ]; then THIS_REFERER="\"$(echo $REFERER|sed -e "s/@@/${THIS_NUM}/g")\"" else THIS_REFERER="\"$REFERER\"" fi fi THIS_URL="\"$(echo $URL|sed -e "s/@@/${THIS_NUM}/g")\"" if [ $REUSE_CONNECTION = 0 ]; then eval $WGET ${CONTINUE:+-c} \ ${THIS_REFERER:+--referer=}${THIS_REFERER} \ ${USERNAME:+--http-user=}${USERNAME} \ ${PASSWORD:+--http-passwd=}${PASSWORD} \ -U "\"${DEFAULT_AGENT}\"" "${THIS_URL}" RETVAL=$? else eval echo "${THIS_URL}" fi if [ $TRIES != 0 ]; then if [ $RETVAL = 0 ]; then ERROR_COUNT=0 else ERROR_COUNT=$(($ERROR_COUNT+1)) fi if [ $ERROR_COUNT -ge $TRIES ]; then exit 0 fi fi NUM=$(($NUM+$STEP)) done } if [ ! -x "$WGET" ]; then WGET="$(which wget 2>/dev/null)" if [ -z "$WGET" ]; then echo "Could not find wget tool." ERROR=1 fi fi RETVAL=0 while getopts :whca:r:s:t:u:p:l: OPT_NAME ; do case $OPT_NAME in c) CONTINUE=1 ;; a) DEFAULT_AGENT="$OPTARG" ;; r) REFERER="$OPTARG" ;; s) if is_number "$OPTARG"; then STEP="$OPTARG" else echo "Value $OPTARG for option -s is not a number" ERROR=1 fi ;; t) if is_number "$OPTARG"; then TRIES="$OPTARG" else echo "Value $OPTARG for option -t is not a number" ERROR=1 fi ;; w) LEADING_ZEROS=1 ;; h) print_help exit 0 ;; u) USERNAME="\"$OPTARG\"" ;; p) PASSWORD="\"$OPTARG\"" ;; l) if is_number "$OPTARG"; then LENGTH="$OPTARG" LEADING_ZEROS=1 else echo "Value $OPTARG for option -l is not a number" ERROR=1 fi ;; \?) echo "Unknown option $OPTARG" ERROR=1 ;; :) echo "Option -$OPTARG requires a parameter" ERROR=1 ;; *) echo "Option $OPT_NAME not recognized ($OPTARG)" ERROR=1 ;; esac done # get rid of already checked parameters shift $(($OPTIND-1)) if [ $# -lt 2 ]; then echo "You have at least to specify the URL and a start value" ERROR=1 else if ! is_number "$2"; then echo "Value $2 is not a number - no valid start value." ERROR=1 else START="$2" fi fi if [ "$3" ]; then if ! is_number "$3"; then echo "Value $3 is not a number - no valid end value." ERROR=1 else END="$3" fi fi # Now check some semantics if [ $END = 0 -a $TRIES = 0 ]; then echo "You have to specify the number of tries, unless no end number is given" ERROR=1 fi if [ $END -gt 0 -a $START -gt $END ]; then echo "Start value $START is greater than end value $END. This could cause loops." ERROR=1 fi if [ -n "$PASSWORD" -a -z "$USERNAME" ]; then echo "A username must be specified when using a password" ERROR=1 fi if [ $ERROR -eq 1 ]; then echo "Stopping because of previous errors" exit 1 fi ##################################### # END CHECKING. PROGRAM STARTS HERE # ##################################### URL="$1" NUM=$START if [ $LENGTH -eq 0 ]; then LENGTH=${#END} fi if echo "$REFERER" | grep -q "@@"; then REUSE_CONNECTION=0 fi if [ $END -eq 0 ]; then REUSE_CONNECTION=0 fi echo "$(basename $0) $@" >> "../.$(basename "$PWD").url" if [ $REUSE_CONNECTION -eq 1 ]; then produce_file_list | tac | eval $WGET ${CONTINUE:+-c} \ ${REFERER:+--referer=}${REFERER} \ ${USERNAME:+--http-user=}${USERNAME} \ ${PASSWORD:+--http-passwd=}${PASSWORD} \ -U "\"${DEFAULT_AGENT}\"" --input-file - else produce_file_list fi