Class BinaryLinkExtractor
java.lang.Object
|
+----BinaryLinkExtractor
- public class BinaryLinkExtractor
- extends Object
NetRand Project
Software Engineering - CS536
University of Wisconsin - Milwaukee
Authors:
- Spring 1998 - Francis William Kasper
File: BinaryLinkExtractor.java
BinaryLinkExtractor extracts links to binary files from HTML files.
-
cacheFile
- the cache file
-
tmp_dir
- the name of the directory to store the cache file
-
url
- the url of the HTML to parse
-
urlConn
- handles the URL connection
-
BinaryLinkExtractor()
-
constructor for the BinaryLinkExtractor
-
cacheHTMLDocument(String)
- gets the HTMl file from the remote serve and writes it to a local cache
file
-
deleteCacheDocument()
- deletes the cache file
-
extract(String, String)
- extracts the text within quotes from a line immediately after a given key
-
extractBinaryLinks(String)
- calls the appropriate functions to extract the binary file links from
given URL.
-
isBaseTag(String)
-
-
isBinaryFile(String)
-
-
isHypertextLink(String)
-
-
isImageSourceTag(String)
-
-
readTag(FileInputStream)
- parses for a HTML tag in a HTML file
url
private URL url
- the url of the HTML to parse
urlConn
private URLConnection urlConn
- handles the URL connection
tmp_dir
private String tmp_dir
- the name of the directory to store the cache file
cacheFile
private File cacheFile
- the cache file
BinaryLinkExtractor
public BinaryLinkExtractor()
- constructor for the BinaryLinkExtractor
extractBinaryLinks
public LinkedList extractBinaryLinks(String urlString)
- calls the appropriate functions to extract the binary file links from
given URL.
- Parameters:
- urlString - - the target URL
- Returns:
- a LinkedList containing the URLs to the found binary files
cacheHTMLDocument
private String cacheHTMLDocument(String urlString) throws IOException
- gets the HTMl file from the remote serve and writes it to a local cache
file
- Parameters:
- urlString - - the URL of the target HTML file
- Throws: IOException
- if there was an error reading the HTML file
deleteCacheDocument
private void deleteCacheDocument()
- deletes the cache file
readTag
private String readTag(FileInputStream theHTML)
- parses for a HTML tag in a HTML file
- Parameters:
- theHTML - - the HTML file's input stream
- Returns:
- a String containing the HTML tag, or null if no tag was found
extract
private String extract(String line,
String key)
- extracts the text within quotes from a line immediately after a given key
- Parameters:
- line - - the line to extract the test from
- key - - the key to search for
- Returns:
- a String containing the extracted text
isBaseTag
private boolean isBaseTag(String tag)
- Parameters:
- tag - - the HTML tag to test
- Returns:
- true iff the tag represents the HTML BASE tag, otherwise false
isHypertextLink
private boolean isHypertextLink(String tag)
- Parameters:
- tag - - the HTML tag to test
- Returns:
- true iff the HTML tag contains a hypertext link, otherwise false
isImageSourceTag
private boolean isImageSourceTag(String tag)
- Parameters:
- tag - - the HTML tag to test
- Returns:
- true iff the HTML tag is an IMG SRC tag, otherwise false
isBinaryFile
private boolean isBinaryFile(String link)
- Parameters:
- link - - the hypertext link to test
- Returns:
- true iff the link references a binary file, otherwise false