# Copyright (c) 1994 Regents of the University of California. # All rights reserved. # $Id: momevent.pl,v 1.7 1994/08/10 10:18:29 fielding Exp $ # --------------------------------------------------------------------------- # momevent: A package for signaling traversal events to other packages # during a spider's progression through the World-Wide Web. # This provides a nice place for hooks for additional programs # that may be interested in the structure of a web. # # Note: This package is heavily dependent on the @Task queues set within # the main package. There is no convenient way to avoid it. # # This software has been developed by Roy Fielding as # part of the Arcadia project at the University of California, Irvine. # # Redistribution and use in source and binary forms are permitted, # subject to the restriction noted below, provided that the above # copyright notice and this paragraph and the following paragraphs are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed in part by the University of # California, Irvine. The name of the University may not be used to # endorse or promote products derived from this software without # specific prior written permission. THIS SOFTWARE IS PROVIDED ``AS IS'' # AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT # LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE. # # Use of this software in any way or in any form, source or binary, # is not allowed in any country which prohibits disclaimers of any # implied warranties of merchantability or fitness for a particular # purpose or any disclaimers of a similar nature. # # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY # FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES # ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION # (INCLUDING, BUT NOT LIMITED TO, LOST PROFITS) EVEN IF THE UNIVERSITY # OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # If you have any suggestions, bug reports, fixes, or enhancements, # send them to the author Roy Fielding at . # --------------------------------------------------------------------------- require "wwwdates.pl"; require "momconfig.pl"; require "momindex.pl"; require "momemail.pl"; package momevent; # ========================================================================== # Get defaults from momconfig.pl $I_am = $momconfig'Version; # ========================================================================== # ========================================================================== # begin_program(): Caller signals the beginning of program execution. # Note that this is called before the instructions are read. # # $exterr -> Defined iff STDERR is being sent to a file # sub begin_program { local($exterr) = @_; local($str, $current); $current = time; $str = join(' ',$I_am, 'starting at', &wwwdates'wtime($current,'')); $str .= '('. substr(&wwwdates'wtime($current,'GMT'), 5) . ")\n"; print $str; if ($exterr) { print STDERR $str; } } # ========================================================================== # begin_infostruct(): Caller signals the beginning of an infostructure # traversal -- a task that should allow the spider # to traverse through the nodes within the structure # and only test nodes linked to but outside of it. # # $task -> The @Task index number for the current task. # # Returns: 1 = okay # 0 = abort this task # sub begin_infostruct { local($task) = @_; print 'Starting Infostructure ', $main'TaskName[$task], ' at ', &wwwdates'wtime(time,''), "\n"; return 0 unless (&momindex'start($main'TaskIndexFile[$task], $main'TaskIndexURL[$task], $main'TaskIndexTitle[$task], $main'TaskChangeWindow[$task], $main'TaskExpireWindow[$task])); &momemail'start($main'TaskName[$task], $main'TaskEmailBroken[$task], $main'TaskEmailRedirected[$task], $main'TaskEmailChanged[$task], $main'TaskEmailExpired[$task]); return 1; } # ========================================================================== # begin_traversed(): Caller signals that the given node has been traversed # as part of the current traversal process. This means # that the node was GET'd and then (if possible) parsed # for child links. The process is now going to work on # testing the child links (if any) until end_traversed. # # $node -> The momhistory index number for the effected URL # $parent -> The URL of this node's parent # sub begin_traversed { local($node, $parent, $response) = @_; &momindex'traversed($node); &momemail'tested($node); } # ========================================================================== # tested(): Caller signals that the given node has been tested as part of # the current traversal process. # # $node -> The momhistory index number for the effected URL # $parent -> The URL of this node's parent # $abs -> This node's absolute version of orig URL # $orig -> This node's original URL as it appeared in parent HTML. # $type -> The type of reference made to the URL # (L=link, I=image, Q=query, R=redirection). # $reused -> If $reused, the test results were obtained from # our history information rather than from a new # network request (i.e. we just saved network bandwidth). # sub tested { local($node, $parent, $abs, $orig, $type, $reused) = @_; &momindex'tested($node, $abs, $orig, $type, $reused); &momemail'tested($node); } # ========================================================================== # end_traversed(): Caller signals the end of an HTML node traversal. # # $task -> The @Task index number for the current task. # sub end_traversed { local($task, $url, $left) = @_; print "Done Traversing $url\nat ", &wwwdates'wtime(time,''), "-- $left remaining on queue\n"; } # ========================================================================== # end_infostruct(): Caller signals the end of an infostructure traversal. # # $task -> The @Task index number for the current task. # sub end_infostruct { local($task) = @_; &momemail'end($main'TaskEmailAddress[$task], $main'TaskIndexTitle[$task], $main'TaskIndexURL[$task]); &momindex'end($main'TaskIndexFile[$task]); print 'Finished Infostructure ', $main'TaskName[$task], ' at ', &wwwdates'wtime(time,''), "\n\n"; } # ========================================================================== # end_program(): Caller signals the end of program execution. # # $exterr -> Defined iff STDERR is being sent to a file # sub end_program { local($exterr) = @_; local($str, $current); print "Summary of Process Results:\n", &momindex'get_procsum; $current = time; $str = join(' ',$I_am, 'finished at', &wwwdates'wtime($current,'')); $str .= '('. substr(&wwwdates'wtime($current,'GMT'), 5) . ")\n"; print $str; if ($exterr) { print STDERR $str; } } # ========================================================================== 1;