import java.net.*; import java.io.*; public class PageSaver { URL theURL; public static void main (String args[]) { // Loop through the command line arguments for (int i = 0; i < args.length; i++) { //Open the URL for reading try { URL root = new URL(args[i]); PageSaver ps = new PageSaver(root); ps.saveThePage(); } catch (MalformedURLException e) { System.err.println(args[i] + " is not a parseable URL"); System.err.println(e); } } // end for } // end main public PageSaver(URL u) { theURL = u; } // saveThePage opens a DataInputStream from the URL, // opens a PrintStream onto a file for the output, // and then copies one to the other while rewriting tags public void saveThePage() { char thisChar; String theTag; PrintStream p = null; try { DataInputStream theHTML = new DataInputStream(theURL.openStream()); p = makeOutputFile(); while (true) { thisChar = (char) theHTML.readByte(); if (thisChar == '<') { theTag = readTag(theHTML); theTag = convertTag(theTag); p.print(theTag); } else { p.print(thisChar); } } // end while } // end try catch (EOFException e) { // This page is done } catch (Exception e) { System.err.println(e); } finally { p.close(); } } // end SaveThePage // We need open a file on the local file system // with the same name as the remote file; // then chain a PrintStream to the file public PrintStream makeOutputFile() throws IOException { FileOutputStream fout; String theFile = theURL.getFile(); // the getFile method returns the filename prefixed with a slash, // e.g. /index.html instead of index.html. That slash needs to be removed. theFile = theFile.substring(1); System.err.println("\n\n\n" + theFile + "\n\n\n"); if (theFile.equals("")) theFile = "index.html"; // At this point you should check to see whether // the file already exists and, if it does, // ask the user if they wish to overwrite it fout = new FileOutputStream(theFile); return new PrintStream(fout); } // The readTag method is called when a < is encountered // in the input stream. This method is responsible // for reading the remainder of the tag. // Note that when this method has been called the < // has been read from the input stream but has not yet been sent // to the output stream. // This method has trouble (as do most web browsers) // if it encounters a raw < sign in the Stream. Technically // raw < signs should be encoded as < in the original HTML. public static String readTag(DataInputStream is) { StringBuffer theTag = new StringBuffer("<"); char theChar = '<'; try { while (theChar != '>') { theChar = (char) is.readByte(); theTag.append(theChar); } // end while } // end try catch (EOFException e) { // Done with the Stream } catch (Exception e) { System.err.println(e); } return theTag.toString(); } // The convertTag method takes a complete tag as // a String and, if it's a relative link, converts it // to an absolute link. The converted tag is returned. public String convertTag(String tag) { // temporary position variables int p1, p2, p3, p4; try { // HTML tags are cases insensitive so converting // it to upper case makes the problem slightly easier String s1 = tag.toUpperCase(); // Find the beginning and the end of the URL // if (s1.startsWith("