#!/bin/bash # # DO NOT EDIT! This file is generated from geo-soon.sh # # # HACK ALERT! This was quickly hacked from an existing script, # and needs to be merged back into the main codebase. # # # Donated to the public domain by Rick Richardson # # Use at your own risk. Not suitable for any purpose. Not legal tender. # # $Id: geo-soon.sh,v 1.32 2007/03/12 18:10:50 rick Exp $ # PROGNAME="$0" usage() { cat <&2 exit 1 } debug() { if [ $DEBUG -ge $1 ]; then echo "`basename $PROGNAME`: $2" >&2 fi } verbose() { if [ $VERBOSE -ge $1 ]; then echo "$2" >&2 fi } dbgcmd() { if [ $DEBUG -ge $DBGCMD_LVL ]; then echo "$@" >&2 fi "$@" } DBGCMD_LVL=2 # # procedure to remove cruft files # remove_cruft() { if [ $DEBUG = 0 -a "$CRUFT" != "" ]; then for i in $CRUFT do [ -f $i ] && rm -f $i done fi } trap remove_cruft EXIT # # Convert DegDec, MinDec, or DMS lat/lon to DegDec # latlon() { # Handle NSEW prefixes arg1=`echo "$1" | sed -e 's/^[nNeE]//' -e 's/^[wW]/-/' -e 's/^[sS]/-/'` # If negative, print the sign then take the absolute value case "$arg1" in -*) echo -n "-"; arg1=`echo "$arg1" | sed 's/^-//'`;; esac # Now handle the 3 different formats case $# in 1) case "$arg1" in *.*.*) echo "$arg1" \ | sed -e 's/,//' -e 's#\([^.]*\)\.#\1 #' -e 's#$# 6k 60/+p#' \ | dc ;; lat=*) echo "$arg1" | sed 's/^lat=//' ;; lon=*) echo "$arg1" | sed 's/^lon=//' ;; *) echo $arg1 ;; esac ;; 2) echo "6k $arg1 $2 60/+p" | dc;; 3) echo "6k $arg1 $2 60/ $3 3600/++p" | dc;; esac } # # Convert DegDec to MinDec # degdec2mindec() { awk -v v=$1 \ 'BEGIN{ i=int(v); f=(v-i)*60; if(f<0)f=-f; printf "%d.%06.3f\n", i, f}' } # # Read RC file, if there is one # read_rc_file() { if [ -f $HOME/.georc ]; then . $HOME/.georc # Allow LAT/LON in rc file to be in any of the formats that we grok if [ "" != "$LAT" ]; then LAT=`latlon $LAT` fi if [ "" != "$LON" ]; then LON=`latlon $LON` fi else cat <<-EOF > $HOME/.georc # # These are the default values for the geo-* series of programs # Please edit this file as needed. Setting values for # USERNAME, PASSWORD, LAT/LON, and STATE are required. # ################################# # Login and paid membership status for www.geocaching.com... #USERNAME=name #PASSWORD=pasword #MOC=0 ################################# # Your HOME lat/lon and state... #LAT=N44.55.666 #LON=W93.11.222 #STATE=MN ################################# # Default map scale, font, and source... #MAPSCALE=10K #MAPFONT=helvetica #MAPSRC=2 ################################# # Login for terraserver.com... #TSCOM_EMAIL=xxx@yyy.com #TSCOM_PW=password ################################# # Miscellaneous... #OUTFMT=gpsdrive EOF error "First time user: please review and edit $HOME/.georc" fi } if [ `uname` = 'Darwin' ]; then sed=gnused date=gdate touch=gtouch PATH=$PATH:/usr/local/bin:/opt/local/bin export PATH else sed=sed date=date touch=touch fi # # Get the value from a name= value= pair in a file # get_value() { # = 97 && val <= 122) #0x61-0x7A encoded = encoded c else if (val >= 65 && val <= 90) #0x41-0x5A encoded = encoded c else if (val >= 48 && val <= 57) #0x30-0x39 encoded = encoded c else if (val >= 45 && val <= 46) #0x2D-0x2E encoded = encoded c else if (c == " ") encoded = encoded "+" else if (val < 128) { lo = val % 16 hi = int(val / 16); encoded = encoded "%" hextab[hi] hextab[lo] } else { byte = 192 + val/64 lo = byte % 16 hi = int(byte / 16); encoded = encoded "%" hextab[hi] hextab[lo] byte = 128 + val%64 lo = byte % 16 hi = int(byte / 16); encoded = encoded "%" hextab[hi] hextab[lo] } } print encoded } ' } # # return true if current arguments appear to be a lat/lon # is_latlon() { if [ "$#" -lt 2 ]; then return 1 fi case "$1" in lat=*) ;; # cut/paste from GPX file [NS]) return 0;; # cut/paste from gc.com [NSns][0-9]*) ;; [-][0-9]*) ;; [0-9]*) ;; *) return 1;; esac case "$2" in lon=*) return 0;; [EWew][0-9]*) return 0;; [-][0-9]*) return 0;; [0-9]*) return 0;; *) return 1;; esac } # # split lines between two strings # # $1 - string 1 # $2 - string 2 # $3 - null or 'g' # split_lines_between() { sed "s@$1$2@$1\\ $2@$3" } ############################################################################## # end #include "geo-common" ############################################################################## ############################################################################## # begin #include "geo-common-gc" ############################################################################## # # Common global constants # UA="Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)" GEO="http://www.geocaching.com" # # Global variables that can be overridden on command line or rc file # PASSWORD=dummy USERNAME=dummy LAT=44.9472 LON=-93.4914 MOC=${MOC:0} # # Global variables # COOKIE_FILE=$HOME/.geocookies NOCOOKIES=0 FOUND=1 USERFOUND=1 BABELFLAGS= RADIUS= OUTFILE= OUTFMT=gpsdrive NUM=20 INCLUDE=* EXCLUDE='-unavail' VARTIME=found GEOSLEEP=${GEOSLEEP:-5} # # Common options handling # gc_usage() { cat < $LOGINPAGE if grep -q "ErrorText" $LOGINPAGE; then error "Login username/password does not match." fi } # # procedure to nag about agreeing to EasyGps download license # easy_warning() { cat <<-EOF You have not agreed to the waypoint download license at $GEO Click one of the waypoint license agreement links at $GEO, read and agree to the license terms, then try this program again. EOF } # # getcids infile cidfile xtrafile archfile number # # Wade thru the HTML and produce lists of found, notfound and new CIDs # getcids() { awk \ -v "CIDFILE=$2" \ -v "XTRAFILE=$3" \ -v "ARCHFILE=$4" \ -v "NUM=$5" \ -v "USERFOUND=$USERFOUND" \ -v "VARTIME=$VARTIME" \ -v "MOC=$MOC" \ -v "DATE=$date" \ < $1 \ ' function hex2dec(x, val) { for(val = 0; length(x); x = substr(x, 2)) val = 16*val + index("0123456789ABCDEF", substr(x, 1, 1)) - 1 return val } # Convert GC0000 to 58913 function wp2id(wp, val) { sub("^GC", "", wp) if (wp < "G000") return hex2dec(wp) set = "0123456789ABCDEFGHJKMNPQRTVWXYZ" val = 0 for (pos = 1; pos <= 4; ++pos) { val *= 31; val += index(set, substr(wp, pos, 1)) - 1; } return val - 411120 } function id2wp(id, val) { gid = ""; if (id < 0) ; else if (id < 65536) gid = sprintf("GC%04X", id) else { GcOffset = 16 * 31 * 31 * 31 - 65536 GcSet = "0123456789ABCDEFGHJKMNPQRTVWXYZ" id += GcOffset; for (i = 1; i <= 4; ++i) { gid = substr(GcSet, id%31 + 1, 1) gid id = int(id / 31) } tmp = substr(GcSet, id%31 + 1, 1) if (tmp != 0) { gid = tmp gid id = int(id / 31) } if (id) gid = "" else gid = "GC" gid } # print "id = ", id, "wp = ", gid > "/dev/stderr" return gid } BEGIN { q = sprintf("%c", 39) ++NUM } /&2" } /]*>", "", name) sub("*.", "", name) sub("]*>", "", name) sub("]*>", "", name) sub("<[^>]*>", "", name) sub("<[^>]*>", "", name) } /"Found It!"/ { if (USERFOUND) ifound = 1 next } /Member-only cache/ { moc = 1 } / ago
"); i += 6 j = match($0, ""); i += 6 j = match($0, "." gdate "<" cmd "<" foundt >> "/tmp/aaa" } tdcnt == 7 && /[ ][0-9][^<]*[0-9]
[0-9].*[0-9][0-9]
"); i += 1 j = match($0, ".*", "") gsub("<[^>]*>.*", "") if ($0 != "") { cmd = sprintf("%s -d \"12am %s\" +%%s", DATE, $0) cmd | getline ifoundt; close(cmd) } # print ifoundt ">" $0 "<" >> "/tmp/aaa" } / avail = 0; archived = 1 } /[^<]/ { avail = 0; } /\(GC.....\)
/ { i = match($0, ".GC.....") gcid = substr($0, i+1, 7) cid = wp2id(gcid) } /\(GC....\)
/ { i = match($0, ".GC....") gcid = substr($0, i+1, 6) cid = wp2id(gcid) } /\(GC...\)
/ { i = match($0, ".GC...") gcid = substr($0, i+1, 5) cid = wp2id(gcid) } /\(GC..\)
/ { i = match($0, ".GC..") gcid = substr($0, i+1, 4) cid = wp2id(gcid) } /left" nowrap>[0-9].*[0-9][0-9]<.td>/ { i = match($0, ">") j = match($0, "<.td>") date = substr($0, i+1, j-i-1) sub(q, "20", date) cmd = sprintf("%s -d \"%s\" +%%s", DATE, date) cmd | getline placedt; close(cmd) } /left" nowrap>[0-9].*[0-9][0-9].") j = match($0, " > "/tmp/aaa" } /alt.*my cache/ { # altmy cache # A mistake in the HTML! should be alt="my cache" if (USERFOUND) { iplaced = 1 ifound = 1 } } /alt="Event Cache"/ { type = "event" gctype = "Event cache" } /alt="Cache In Trash Out Event"/ { type = "cito" gctype = "Cache In Trash Out Event" } /alt="Letterbox Hybrid"/ { type = "hybrid" gctype = "Letterbox Hybrid" } /alt="Multi-cache"/ { type = "multi" gctype = "Multi-Cache" } /alt="Unknown Cache"/ { type = "unknown" gctype = "Unknown Cache" } /alt="Virtual Cache"/ { type = "virtual" gctype = "Virtual cache" } /alt="Webcam Cache"/ { type = "webcam" gctype = "Webcam Cache" } /name=.CID. / { i = match($0, "value=.") cid = substr($0, i+7, 99) + 0 } /name=.BID. / { i = match($0, "value=.") bid = substr($0, i+7, 99) + 0 } /\)
/ { text = $0 sub(/.*\(/, "", text) sub(/\).*/, "", text) n = split(text, fld, "/") if (n == 2) { difficulty = fld[1] terrain = fld[2] } } /alt=.Size: / { container = $0 sub(".*Size: ", "", container) sub(". border.*", "", container) sub(". title.*", "", container) } /Unapproved cache/ && inrecord { avail = 0; archived = 1 } /<\/[tT][rR]>/ && inrecord { inrecord = 0 strtype = "Geocache" if (moc) strtype = strtype "-moc" if (unfound) strtype = strtype "-unfound" else if (ifound) strtype = strtype "-ifound" if (!avail) strtype = strtype "-unavail" if (archived) strtype = strtype "-archived" strtype = strtype "-" type # gpsbabel only allows one time in the DB, figure out what # time to use for this, but always carry all three times # in the .xtra file if (iplaced) ifoundt = placedt vartime=1234 if (VARTIME == "placed") vartime = placedt else if (VARTIME == "ifound") vartime = ifoundt else if (foundt > 0) vartime = foundt else vartime = placedt # avail=1 is the choice right now (8/19/05) # archived=0 is the choice right now (11/14/06) if (!archived && (MOC || !moc)) { if (bid != 0) { printf("-dBID=%d\n", bid) > CIDFILE gcid = id2wp(bid) } else if (cid != 0) { printf("-dCID=%d\n", cid) > CIDFILE } # GCID type vartime ifound moc iplaced tPLACED tFOUND tIFOUND printf "%s\t%s\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%.1f\t%.1f\t%s\t%s\n", gcid, strtype, vartime, ifound, moc, iplaced, placedt, foundt, ifoundt, difficulty, terrain, container, gctype >> XTRAFILE if (--NUM == 0) exit } else if (archived) { printf "%s\t%s\t0.0\t0.0\t%s%s%s\t%s\t%d\t" \ "%s\t%s\t%s\t%d\t%d\t%d\t%.1f\t%.1f\t%s\t%s\n", gcid, name, "http://www.geocaching.com/seek/cache_details.aspx", "?pf=y&log=y&wp=", gcid, strtype, vartime, ifound, moc, iplaced, placedt, foundt, ifoundt, difficulty, terrain, container, gctype >> ARCHFILE } } ' } # # A temporary style we can use for merging the loc data with # the scraped html data. This is a dual purpose hack. We # use it as an output format to convert the .loc data to a # record-per-line format. We use it as an input format to # read up the merged data. # make_scrape_style() { cat <<-EOF FIELD_DELIMITER TAB RECORD_DELIMITER NEWLINE BADCHARS TAB IFIELD SHORTNAME, "", "%s" IFIELD DESCRIPTION, "", "%s" IFIELD LAT_DECIMAL, "", "%08.5f" IFIELD LON_DECIMAL, "", "%08.5f" IFIELD URL, "", "%s" IFIELD ICON_DESCR, "", "%s" #strtype (Geocache-*) IFIELD TIMET_TIME, "", "%ld" #variable time IFIELD IGNORE, "", "%s" #ifound IFIELD IGNORE, "", "%s" #moc IFIELD IGNORE, "", "%s" #iplaced IFIELD IGNORE, "", "%s" #placed time IFIELD IGNORE, "", "%s" #found time IFIELD IGNORE, "", "%s" #ifound time IFIELD GEOCACHE_DIFF, "", "%3.1f" #difficulty IFIELD GEOCACHE_TERR, "", "%3.1f" #terrain IFIELD GEOCACHE_CONTAINER,"", "%s" #container (not set) IFIELD GEOCACHE_TYPE, "", "%s" #gc.com type OFIELD SHORTNAME, "", "%s" OFIELD DESCRIPTION, "", "%s" OFIELD LAT_DECIMAL, "", "%08.5f" OFIELD LON_DECIMAL, "", "%08.5f" OFIELD URL, "", "%s" EOF # OFIELD ICON_DESCR, "", "%s" } # # Query the gc website # gc_query() { if [ $USERFOUND = 0 ]; then FOUND=1 fi if [ $FOUND = 0 ]; then SEARCH="$SEARCH&f=1" fi if [ $DEBUG -gt 0 ]; then TMP=/tmp/geo else TMP=/tmp/geo$$ fi HTMLPAGE=$TMP.page CIDFILE=$TMP.cids LOCFILE=$TMP.loc XTRAFILE=$TMP.xtra CSVFILE=$TMP.csv MERGEFILE=$TMP.merge ARCHFILE=$TMP.arch OUTWAY=$TMP.way STYLE=$TMP.newstyle CRUFT="$CRUFT $HTMLPAGE" CRUFT="$CRUFT $CIDFILE" CRUFT="$CRUFT $LOCFILE" CRUFT="$CRUFT $XTRAFILE" CRUFT="$CRUFT $CSVFILE" CRUFT="$CRUFT $MERGEFILE" CRUFT="$CRUFT $ARCHFILE" CRUFT="$CRUFT $OUTWAY" CRUFT="$CRUFT $STYLE" if [ $NOCOOKIES = 1 ]; then CRUFT="$CRUFT $COOKIE_FILE" fi # # Login to gc.com # gc_login "$USERNAME" "$PASSWORD" # # Find the bookmark # if [ "$BOOKMARK" != "" ]; then URL="$GEO/bookmarks" debug 1 "$start: curl $URL #bookmark" SEARCH=` curl -L -s -b $COOKIE_FILE -A "$UA" "$URL" \ | grep -y ">$BOOKMARK<" \ | sed -e 's@^.*href=.http://www.geocaching.com/@@' -e 's/.>.*$//' ` case "$SEARCH" in *bookmarks*) ;; *) error "No bookmark with the name '$BOOKMARK'.";; esac fi # # We might combine one or more pages into a single XML, so cobble # up a header with the ?xml and loc tags. # cat <<-EOF > $LOCFILE EOF # # Loop, getting at least "NUM" locations # if [ $DEBUG -gt 0 ]; then filter2="tee $TMP.bulk" else filter2=cat fi > $XTRAFILE > $ARCHFILE SLEEP=1 ((start=0)) while ((start < NUM)); do sleep $SLEEP # # Fetch the page of closest caches and scrape the cache ID's # case "$SEARCH" in *bookmark*) URL="$GEO/$SEARCH" ;; *) URL="$GEO/seek/nearest.aspx" URL="$URL$SEARCH" ;; esac debug 1 "$start: curl $URL #list" if ((start > 0)); then # "postback"... grab the "next" button case "$SEARCH" in *bookmark*) __EVENTTARGET="ListInfo\$pgrBMItems\$_ctl8" ;; *) TGT=$(sed -n "s/^.*__doPostBack('.*pgrTop\$\(.*\)','.*/\1/p" \ < $HTMLPAGE) __EVENTTARGET="ctl00%24ContentBody%24pgrTop%24$TGT" ;; esac curl -L -s -b $COOKIE_FILE -A "$UA" \ -d __EVENTTARGET="$__EVENTTARGET" \ -d __EVENTARGUMENT="$__EVENTARGUMENT" \ -d __VIEWSTATE="$__VIEWSTATE" \ -d __EVENTVALIDATION="$__EVENTVALIDATION" \ "$URL" > $HTMLPAGE else curl -L -s -b $COOKIE_FILE -A "$UA" \ "$URL" > $HTMLPAGE if [ "$DEBUG" -ge 1 ]; then grep "Total Records:.*Top.*" $HTMLPAGE | sed -e "s/<.b>.*//" -e "s/^.*span>//" -e "s///" 1>&2 fi fi rc=$?; if [ $rc != 0 ]; then error "curl: fetch $URL" fi if grep -s -q "We encountered an error when requesting that page!" \ $HTMLPAGE; then error "searching error (1) on $start" fi if grep -s -q "has resulted in an error" \ $HTMLPAGE; then error "searching error (2) on $start" fi if grep -s -q "By State" $HTMLPAGE; then error "searching gave up on $start" fi # # Grab a few important values from the page # get_value __EVENTTARGET $HTMLPAGE get_value __EVENTARGUMENT $HTMLPAGE get_value __VIEWSTATE $HTMLPAGE __VIEWSTATE=`urlencode "$__VIEWSTATE"` get_value __EVENTVALIDATION $HTMLPAGE __EVENTVALIDATION=`urlencode "$__EVENTVALIDATION"` # # Grab the CIDs into two categories: found and notfound # > $CIDFILE getcids $HTMLPAGE $CIDFILE $XTRAFILE $ARCHFILE $((NUM-start)) # # Fetch the waypoints, rip out the ?xml and loc tags, and # append to the $LOCFILE file. # if [ -s "$CIDFILE" ]; then sleep $SLEEP case "$SEARCH" in *bookmark*) URL="$GEO/$SEARCH" ;; *) URL="$GEO/seek/nearest.aspx" URL="$URL$SEARCH" ;; esac debug 2 "$start: curl $URL #loc" curl -s -b $COOKIE_FILE -A "$UA" \ -d __EVENTTARGET="$__EVENTTARGET" \ -d __EVENTARGUMENT="$__EVENTARGUMENT" \ -d __VIEWSTATE="$__VIEWSTATE" \ -d __EVENTVALIDATION="$__EVENTVALIDATION" \ `cat $CIDFILE` \ -d "Download=Download+Waypoints" \ -d "ListInfo:btnDownload=Download+to+.Loc" \ "$URL" \ | $filter2 \ | sed -e 's/^]*>//' \ -e 's/>[gG]eocacheGeocache]*>//' \ -e 's###' \ | tr '\303' 'A' \ >> $LOCFILE rc=$?; if [ $rc != 0 ]; then error "curl: fetch the waypoints" fi if grep -s -q "you are not logged in" $LOCFILE; then error "you are not logged in on $start" fi fi # # Check to see if the user hasn't agreed to license terms # if grep -s -q "lblAgreementText" $LOCFILE; then easy_warning >&2 remove_cruft exit fi ((start=start+20)) # If the Next button is disabled, break this loop # grep "Records: [1-9].*disabled.>Next<" $HTMLPAGE if grep -s -q "Records: [1-9].*disabled.>Next<" $HTMLPAGE; then # echo "$start: dis" break; fi done # # Finish off the .loc file # echo "" >> $LOCFILE # # Convert the .loc data to .csv format and join it with # the extra data scraped from the HTML page. Filter out # the data according to the -I and -X options. # # http://www.geocaching.com/seek/cache_details.aspx?wp=GCG2H4 # http://www.geocaching.com/seek/cache_details.aspx?pf=y&log=y&wp=GCG2H4 # http://www.geocaching.com/seek/cache_details.aspx?ID=92117&log=y&pf=y # # The joined .csv format looks like this: # GCH636 Jidana 3 by rickrich 44.94520 -93.47540 \ # http://www.geocaching.com/seek/cache_details.aspx?pf=y&log=y&wp=GCH636 \ # Geocache-ifound-regular 1070285077 1 0 1 \ # 1067925600 1070285077 0 # make_scrape_style > $STYLE dbgcmd gpsbabel -i geo$GEONUKE -f $LOCFILE \ -o xcsv,style=$STYLE -F $CSVFILE if [ $? != 0 ]; then error "gpsbabel returned error code [1]" fi join -t ' ' $CSVFILE $XTRAFILE \ | egrep -- "$INCLUDE" | egrep -v -- "$EXCLUDE" \ | sed -e 's/wp=/pf=y\&log=y\&&/' > $MERGEFILE if [ $FOUND = 1 ]; then cat $ARCHFILE \ | egrep -- "$INCLUDE" | egrep -v -- "$EXCLUDE" \ | sed -e 's/wp=/pf=y\&log=y\&&/' >> $MERGEFILE fi if [ $DEBUG -ge 2 ]; then # First two of these should be the same number of lines! wc -l $CSVFILE >&2 wc -l $XTRAFILE >&2 wc -l $MERGEFILE >&2 fi # # Convert to the desired format # BABELFILT= if [ "$RADIUS" != "" ]; then BABELFILT="-x radius,distance=$RADIUS,lat=$LAT,lon=$LON" fi if [ $SQL = 1 ]; then # # add it via mysql # if [ "$OUTFILE" != "" ]; then >"$OUTFILE" fi if [ $PURGE = 1 ]; then gpsdrive_purge | gpsdrive_mysql PURGE=2 fi dbgcmd gpsbabel $BABELFLAGS \ -i xcsv,style=$STYLE -f $MERGEFILE \ $BABELFILT -o "$OUTFMT" -F $OUTWAY if [ $? != 0 ]; then error "gpsbabel returned error code [2]" fi gpsdrive_add <$OUTWAY $SQLTAG | gpsdrive_mysql elif [ $MAP = 1 ]; then dbgcmd gpsbabel $BABELFLAGS \ -i xcsv,style=$STYLE -f $MERGEFILE \ $BABELFILT -o "$OUTFMT" -F $OUTWAY if [ $? != 0 ]; then error "gpsbabel returned error code [3]" fi if [ "$OUTFILE" = "" ]; then dbgcmd geo-map -s0 $MAPOPTS -t$OUTWAY else dbgcmd geo-map -s0 $MAPOPTS -t$OUTWAY -o"$OUTFILE" fi else # # output to stdout or to a file # if [ "$OUTFILE" = "" ]; then OUTTMP="$TMP.way"; CRUFT="$CRUFT $OUTTMP" dbgcmd gpsbabel $BABELFLAGS \ -i xcsv,style=$STYLE -f $MERGEFILE \ $BABELFILT -o "$OUTFMT" -F $OUTTMP if [ $? != 0 ]; then error "gpsbabel returned error code [4]" fi cat $OUTTMP else dbgcmd gpsbabel $BABELFLAGS \ -i xcsv,style=$STYLE -f $MERGEFILE \ $BABELFILT -o "$OUTFMT" -F $OUTFILE if [ $? != 0 ]; then error "gpsbabel returned error code [5]" fi fi fi # # Optionally, print the HTML pages # if [ "$CMDPIPE" != "" ]; then OIFS="$IFS" IFS=" " while read id desc lat lon url cat vartime ifound moc iplaced \ placedt foundt ifoundt extra; do url="$url&decrypt=y" echo "Print: $url" HTMLPAGE2=$TMP.html CRUFT="$CRUFT $HTMLPAGE2" debug 1 "curl $url" >&2 dbgcmd curl -s -A "$UA" -b $COOKIE_FILE "$url" > $HTMLPAGE2 htmldoc --quiet -t ps --nup 2 --fontsize 14 --webpage $HTMLPAGE2 \ | psselect -q -p1-1 | eval $CMDPIPE # exit done < $MERGEFILE IFS="$OIFS" fi # # Optionally, fetch printable HTML pages # if [ "$HTMLDIR" != "" -o "$LOGDIR" != "" ]; then if [ "$HTMLDIR" != "" -a ! -d "$HTMLDIR" ]; then mkdir "$HTMLDIR" || error "Couldn't mkdir $HTMLDIR" fi if [ "$LOGDIR" != "" -a ! -d "$LOGDIR" ]; then mkdir "$LOGDIR" || error "Couldn't mkdir $LOGDIR" fi HTMLPAGE2=$TMP.html CRUFT="$CRUFT $HTMLPAGE2" TIMESTAMP=$TMP.time CRUFT="$CRUFT $TIMESTAMP" fetchcnt=0 fetchmax=1000 if [ $DEBUG -ge 3 ]; then fetchmax=3 fi OIFS="$IFS" IFS=" " while read id desc lat lon url cat vartime ifound moc iplaced \ placedt foundt ifoundt extra; do # # Don't fetch page if we already have a current version # if [ $placedt -gt $foundt ]; then filetime="$placedt" else filetime="$foundt" fi $touch -d "1/1/70 $filetime seconds last second" $TIMESTAMP if [ "$filetime" -gt 0 -a "$HTMLDIR/$id.html" -nt $TIMESTAMP ]; then continue fi # Limit to 1000 caches/day ((fetchcnt=fetchcnt+1)) if [ $fetchcnt -gt $fetchmax ]; then error "Fetch count exceeded $fetchmax. Try tomorrow!" fi # Be kind to the server. Do not remove this sleep sleep $GEOSLEEP echo "" > $HTMLPAGE debug 1 "curl $url" >&2 dbgcmd curl -s -A "$UA" -b $COOKIE_FILE "$url" | tr -d "\001\007\010\013\017\020\031" >> $HTMLPAGE if [ $? != 0 ]; then error "Couldn't fetch $id cache page" fi size=$(ls -l $HTMLPAGE | awk '{print $5}') if [ $size -lt 1000 ]; then debug 0 "Could not retrieve web page for cache $id" continue fi if [ "$HTMLDIR" != "" ]; then cp $HTMLPAGE "$HTMLDIR/$id.html" || error "Couldn't copy $id cache page" if [ "$filetime" -gt 0 ]; then $touch -d "1/1/70 $filetime seconds" "$HTMLDIR/$id.html" fi fi if [ "$LOGDIR" != "" ]; then sed -e '1,/Cache find counts/d' -e 's/$LOGUSERNAME<|strong> \($LOGUSERNAME\) \(" > $HTMLPAGE2 lynx -dump $HTMLPAGE2 > $LOGDIR/$id.log if [ "$filetime" -gt 0 ]; then $touch -d "1/1/70 $filetime seconds" "$LOGDIR/$id.log" fi fi done < $MERGEFILE IFS="$OIFS" if [ "$LOGDIR" != "" ]; then # This is a hack, and might be innaccurate echo -n "Finds:" >&2 MONTHS=" January | February | March | April | May | June " MONTHS="$MONTHS| July | August | September | October " MONTHS="$MONTHS| November | December " egrep "$MONTHS" $LOGDIR/*.log | egrep "icon_smile|icon_happy|icon_camera" | wc -l >&2 fi fi } ############################################################################## # end #include "geo-common-gc" ############################################################################## ############################################################################## # begin #include "geo-common-gpsdrive" ############################################################################## # # default MySQL global options... # SQLUSER=gast SQLPASS=gast SQLDB=geoinfo SQLTAG=Geocache # # procedures for updating gpsdrive database via MySQL # # Global Vars: $SQLDB, $SQLTAG $OUTFILE # gpsdrive_purge() { delcmd="delete from waypoints" echo "use $SQLDB;" echo "$delcmd where type like '$SQLTAG%';" } gpsdrive_add() { delcmd="delete from waypoints" addcmd="replace into waypoints (name,lat,lon,type)" sqltag="$1" echo "use $SQLDB;" while read name lat lon type extra do name=`echo "$name" | tr -d "'"` # Primary key is autoincrementing id number, so delete # the old record (if any) by name and type if [ $PURGE = 0 ]; then echo "$delcmd where name='$name' and type like '$SQLTAG%';" fi if [ $DELETE = 0 ]; then # Add the new record if [ "$sqltag" = "Geocache" ]; then tag="$type" else tag="$sqltag" fi echo "$addcmd values ('$name','$lat','$lon','$tag');" fi done } gpsdrive_mysql() { if [ "$OUTFILE" != "" ]; then cat >> $OUTFILE elif [ $DEBUG -gt 0 ]; then cat else mysql -u$SQLUSER -p$SQLPASS fi } # # Extended list of gpsbabel output formats # gpsbabel_formats() { gpsbabel -? | sed -e '1,/File Types/d' -e '/Supported data filters/,$d' echo " gpsdrive.sql " \ "GpsDrive direct MySQL database insertion" echo " map[,geo-map-opts] " \ "Display map of waypoints using geo-map" } ############################################################################## # end #include "geo-common-gpsdrive" ############################################################################## gid2id() { gid="${1/#GC/}" if [ "$gid" "<" "G000" ]; then echo "$((0x$gid))" else ((val=0)) ((pos=0)) while ((pos < 4)); do digit=`expr index 0123456789ABCDEFGHJKMNPQRTVWXYZ "${gid:$pos:1}"` ((val=val*31+$digit-1)) ((++pos)) done echo "$((val-=411120))" fi } id2gid() { awk -v id="$1" ' BEGIN { gid = ""; if (id < 0) ; else if (id < 65536) gid = sprintf("GC%04X", id) else { GcOffset = 16 * 31 * 31 * 31 - 65536 GcSet = "0123456789ABCDEFGHJKMNPQRTVWXYZ" id += GcOffset; for (i = 1; i <= 4; ++i) { gid = substr(GcSet, id%31 + 1, 1) gid id = int(id / 31) } if (id) gid = "" else gid = "GC" gid } print gid } ' } # # Query the gc website # gc_query_init() { if [ $DEBUG -gt 0 ]; then TMP=/tmp/geo else TMP=/tmp/geo$$ fi HTMLPAGE=$TMP.page CIDFILE=$TMP.cids LOCFILE=$TMP.loc LOCFILE=/tmp/geo-soon.loc; rm -f $LOCFILE PIPEFILE=$TMP.ps TSFILE=$TMP.ts LOCTMP=$TMP.loctmp OUTWAY=$TMP.way CRUFT="$CRUFT $HTMLPAGE" CRUFT="$CRUFT $CIDFILE" #CRUFT="$CRUFT $LOCFILE" CRUFT="$CRUFT $LOCTMP" CRUFT="$CRUFT $TSFILE" CRUFT="$CRUFT $PIPEFILE" CRUFT="$CRUFT $OUTWAY" if [ $NOCOOKIES = 1 ]; then CRUFT="$CRUFT $COOKIE_FILE" fi # # Login to gc.com # gc_login "$USERNAME" "$PASSWORD" } test_cid() { _id=$1 debug 1 "lo=$lo hi=$hi test=$1" # # Grab a few important values from the page # get_value __EVENTTARGET $HTMLPAGE get_value __EVENTARGUMENT $HTMLPAGE get_value __VIEWSTATE $HTMLPAGE __VIEWSTATE=`urlencode "$__VIEWSTATE"` sleep 2 dbgcmd curl -s -b $COOKIE_FILE -A "$UA" \ -d __EVENTTARGET="$__EVENTTARGET" \ -d __EVENTARGUMENT="$__EVENTARGUMENT" \ -d __VIEWSTATE="$__VIEWSTATE" \ -d CID=$_id \ -d "Download=Download+Waypoints" \ "$URL" \ > $LOCTMP read hdr < $LOCTMP case "$hdr" in *html*) return 1;; *) return 0;; esac } gc_query_last() { lo=$1 hi=$2 logid=$(id2gid $lo) higid=$(id2gid $hi) debug 0 "Search for last submitted cache between $lo-$hi ($logid-$higid)" URL="$GEO/seek/nearest.aspx" URL="$URL$SEARCH" dbgcmd curl -L -s -b $COOKIE_FILE -A "$UA" \ "$URL" > $HTMLPAGE while : do if [ $hi -le $lo ]; then break fi cid=$(dc -e "$hi $lo +2/p") if test_cid $cid; then # ID exists lo=$(dc -e "$cid 1+p") else # ID doesn't exist hi=$(dc -e "$cid 1-p") fi done LASTID=$lo debug 0 "Last submitted cache is $LASTID ($(id2gid $LASTID))" } gc_gid_query() { # # We might combine one or more pages into a single XML, so cobble # up a header with the ?xml and loc tags. # cat <<-EOF > $LOCFILE EOF sleep 3 URL="$GEO/seek/nearest.aspx" URL="$URL$SEARCH" dbgcmd curl -L -s -b $COOKIE_FILE -A "$UA" \ "$URL" > $HTMLPAGE ((qcnt=(IDCNT+19)/20)) ((q=0)) ((i=0)) while ((q < qcnt)); do > $CIDFILE while ((i < IDCNT)); do id=${ID[i]} echo "-dCID=$id" >> $CIDFILE ((++i)) if ((i%20 == 0)); then break; fi done ((++q)) # # Grab a few important values from the page # get_value __EVENTTARGET $HTMLPAGE get_value __EVENTARGUMENT $HTMLPAGE get_value __VIEWSTATE $HTMLPAGE __VIEWSTATE=`urlencode "$__VIEWSTATE"` sleep 2 dbgcmd curl -s -b $COOKIE_FILE -A "$UA" \ -d __EVENTTARGET="$__EVENTTARGET" \ -d __EVENTARGUMENT="$__EVENTARGUMENT" \ -d __VIEWSTATE="$__VIEWSTATE" \ `cat $CIDFILE` \ -d "Download=Download+Waypoints" \ "$URL" \ | sed -e 's/^]*>//' \ -e 's/>[gG]eocacheGeocache]*>//' \ -e 's###' \ | tr '\303' 'A' \ > $LOCTMP read hdr < $LOCTMP case "$hdr" in *html*) debug 0 "Last ID was $id ($(id2gid $id))" echo "LASTID=$id" > $HOME/.geo-soon break; ;; *) cat $LOCTMP >> $LOCFILE ;; esac # # Check to see if the user hasn't agreed to license terms # if grep -s -q "STEP2=NO" $LOCFILE; then easy_warning >&2 remove_cruft exit fi done # # Finish off the .loc file # echo "" >> $LOCFILE # # Filter to our radius and convert to pipesep format # BABELFILT= if [ "$RADIUS" != "" ]; then BABELFILT="-x radius,distance=$RADIUS,lat=$LAT,lon=$LON" fi dbgcmd gpsbabel $BABELFLAGS \ -i geo$GEONUKE -f $LOCFILE \ $BABELFILT -o tabsep -F- | tr ' ' '|' > $PIPEFILE BABELFILT= # # Fetch each cache page to see if it exists # OIFS="$IFS" IFS="|" while read index shortname description notes url urltext icon lat lon \ lat32 lon32 latdecdir londecdir latdirdec londirdec latdir londir \ altfeet altmeters excel timet diff terr container type extra; do #url="http://www.geocaching.com/seek/cache_details.aspx?wp=GCJMA5" sleep 1 dbgcmd curl -s -A "$UA" "$url" > $HTMLPAGE msg="This cache listing has not been approved yet" if grep -q "$msg" $HTMLPAGE; then echo -e "$index \c" echo -e "$shortname \c" echo -e "$description \c" echo -e "$notes \c" echo -e "$url \c" echo -e "$urltext \c" echo -e "$icon \c" echo -e "$lat \c" echo -e "$lon \c" echo fi done < $PIPEFILE > $TSFILE IFS="$OIFS" # # Convert to the desired format # if [ $SQL = 1 ]; then # # add it via mysql # if [ "$OUTFILE" != "" ]; then >"$OUTFILE" fi if [ $PURGE = 1 ]; then gpsdrive_purge | gpsdrive_mysql PURGE=2 fi dbgcmd gpsbabel $BABELFLAGS \ -i tabsep -f $TSFILE \ $BABELFILT -o "$OUTFMT" -F $OUTWAY gpsdrive_add <$OUTWAY $SQLTAG | gpsdrive_mysql elif [ $MAP = 1 ]; then dbgcmd gpsbabel $BABELFLAGS \ -i tabsep -f $TSFILE \ $BABELFILT -o "$OUTFMT" -F $OUTWAY if [ "$OUTFILE" = "" ]; then dbgcmd geo-map -s0 $MAPOPTS -t$OUTWAY else dbgcmd geo-map -s0 $MAPOPTS -t$OUTWAY -o"$OUTFILE" fi else # # output to stdout or to a file # if [ "$OUTFILE" = "" ]; then OUTFILE=/dev/fd/1 fi dbgcmd gpsbabel $BABELFLAGS \ -i tabsep -f $TSFILE \ $BABELFILT -o "$OUTFMT" -F $OUTFILE fi } # # Set default options, can be overriden on command line or in rc file # UPDATE_URL=$WEBHOME/geo-nearest UPDATE_FILE=geo-nearest.new read_rc_file # # Process the options # SQLTAG=Geocache-soon RADIUS=35 NUM=1000 STARTID= gc_getopts "$@" shift $? # # Main program # case "$#" in 6) # Cut and paste from geocaching.com cache page # N 44° 58.630 W 093° 09.310 LAT=`echo "$1$2.$3" | tr -d '\260\302' ` LAT=`latlon $LAT` LON=`echo "$4$5.$6" | tr -d '\260\302' ` LON=`latlon $LON` SEARCH="?origin_lat=$LAT&origin_long=$LON" ;; 2) LAT=`latlon $1` LON=`latlon $2` SEARCH="?origin_lat=$LAT&origin_long=$LON" ;; 0) SEARCH="?origin_lat=$LAT&origin_long=$LON" ;; *) usage ;; esac # # Init vars and get a viewstate # gc_query_init # # Figure out where to start search from. Cache the last ID in # a file. # LASTSOON=$HOME/.geo-soon if [ ! -f $LASTSOON ]; then touch -d "1/1/70" $LASTSOON fi TIMESTAMP=${TMP}-timestamp; CRUFT="$CRUFT $TIMESTAMP" touch -d "120 minutes ago" $TIMESTAMP . $LASTSOON if [ $LASTSOON -ot $TIMESTAMP ]; then if [ "$LASTID" = "" ]; then LASTID=200000 fi ((LASTID=LASTID-1)) gc_query_last $LASTID 220000 echo "LASTID=$LASTID" > $LASTSOON fi ((STARTID=LASTID-NUM)) debug 0 "Starting search from $LASTID-$NUM=$STARTID ($(id2gid $STARTID))" id="$STARTID" while ((i < NUM)); do ((idn=id+i)) ID[$i]=$idn ((++i)) done IDCNT=$i gc_gid_query