/* parse HTML to URL list */ parse arg "FILE"filename "URLBASE"urlbase _debug=0 filename=strip(filename) urlbase=strip(urlbase) parse Value urlbase with protocol ":" option1 "/" option2 "/" root "/" . urlbaseroot=protocol||":"||option1||"/"||option2||"/"||root DebugSay(urlbaseroot||"/") ok=open("in",filename,r) if ~ok then do Say "unable to open file :" filename exit 0 end pos=0 DO while ~EOF("in") line=line||readln("in") apos=POS("' */ DebugSay('Assuming url is given in href=...[space or <] type') line=substr(line,2) closingpos=min(POS(' ',line),POS('>',line) DO while (closingpos==0)&(~EOF('in')) line=line||readln('in') closingpos=min(POS(' ',line),POS('>',line) END end /* We finally have our url */ url=LEFT(line,closingpos-1) DebugSay(Url) if right(url,1)='/' then do /* it's a dir : what to do next ? */ /* call a recursive download ? working on it later */ end else do if left(url,7)~="http://" then /* TODO: support ftp:// mms:// and so on */ do /* relative URL */ if left(url,1)="/" then do /* relative to the root of the urlbase */ Say urlbaseroot||url end else do Say urlbase||url end end else do /*absolute url*/ Say Url end end end /* href= found */ end /* 1) then Pull return _debug