/* parse HTML to URL list */
parse arg "FILE"filename "URLBASE"urlbase
_debug=0
filename=strip(filename)
urlbase=strip(urlbase)
parse Value urlbase with protocol ":" option1 "/" option2 "/" root "/" .
urlbaseroot=protocol||":"||option1||"/"||option2||"/"||root
DebugSay(urlbaseroot||"/")
ok=open("in",filename,r)
if ~ok then
do
Say "unable to open file :" filename
exit 0
end
pos=0
DO while ~EOF("in")
line=line||readln("in")
apos=POS("' */
DebugSay('Assuming url is given in href=...[space or <] type')
line=substr(line,2)
closingpos=min(POS(' ',line),POS('>',line)
DO while (closingpos==0)&(~EOF('in'))
line=line||readln('in')
closingpos=min(POS(' ',line),POS('>',line)
END
end
/* We finally have our url */
url=LEFT(line,closingpos-1)
DebugSay(Url)
if right(url,1)='/' then
do
/* it's a dir : what to do next ? */
/* call a recursive download ? working on it later */
end
else
do
if left(url,7)~="http://" then /* TODO: support ftp:// mms:// and so on */
do
/* relative URL */
if left(url,1)="/" then
do
/* relative to the root of the urlbase */
Say urlbaseroot||url
end
else
do
Say urlbase||url
end
end
else
do
/*absolute url*/
Say Url
end
end
end /* href= found */
end /* 1) then
Pull
return _debug