直接上代码
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestHtml { /** * 读取一个网页全部内容 */ public String getOneHtml(final String htmlurl) throws IOException { URL url; String temp; final StringBuffer sb = new StringBuffer(); try { url = new URL(htmlurl); final BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(), "gbk"));// 读取网页全部内容 while ((temp = in.readLine()) != null) { sb.append(temp); } in.close(); } catch (final MalformedURLException me) { System.out.println("你输入的URL格式有问题!请仔细输入"); me.getMessage(); throw me; } catch (final IOException e) { e.printStackTrace(); throw e; } return sb.toString(); } /** * * @param s * @return 获得网页标题 */ public String getTitle(final String s) { String regex; String title = ""; final Listlist = new ArrayList (); regex = " .*? "; final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } for (int i = 0; i < list.size(); i++) { title = title + list.get(i); } return outTag(title); }/** * 获取参数 * @param s * @param regexarg * @return */ public String getByRegex(final String s,String regexarg) { String regex; String title = ""; final Listlist = new ArrayList (); regex = regexarg; final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } for (int i = 0; i < list.size(); i++) { title = title + list.get(i); } return outTag(title); }/** * * @param args * */ public static void main(final String args[]) { String url = "http://detail.1688.com/offer/41797007099.html?tracelog=p4p"; try { String html = ""; TestHtml testHtml=new TestHtml(); html = testHtml.getOneHtml(url); String Regex=" .*?"; String content=testHtml.getByRegex(html, Regex); System.out.println("contet is :"+content); } catch (final Exception e) { e.getMessage(); } }}