利用.NET框架提供的 WebClient类 和 WebRequest类,我们可以很轻易地得到给定URL地址的源代码,很简单,以下是C#的完整的例子.
GetPageHtml.aspx
- <%@ Page language="c#" validateRequest = "false" Codebehind="GetPageHtml.aspx.cs"
- AutoEventWireup="false" Inherits="eMeng.Exam.GetPageHtml" %>
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
- <HTML>
- <HEAD>
- <title>得到网页源代码</title>
- <meta name="GENERATOR" Content="Microsoft Visual Studio 7.0">
- <meta name="CODE_LANGUAGE" Content="C#">
- <meta name="vs_defaultClientScript" content="JavaScript">
- <meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
- </HEAD>
- <body MS_POSITIONING="GridLayout">
- <form id="aspNetBuffer" method="post" runat="server">
- <div align="center" style="FONT-WEIGHT: bold" mce_style="FONT-WEIGHT: bold">得到任意网页源代码</div>
- <asp:TextBox id="UrlText" runat="server" Width="400px">http://dotnet.aspx.cc/content.aspx
- </asp:TextBox>
- <asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到"></asp:Button>
- <asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到"></asp:Button>
- <br>
- <asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine">
- </asp:TextBox>
- </form>
- </body>
- </HTML>
GetPageHtml.aspx.cs
- using System;
- using System.Collections;
- using System.ComponentModel;
- using System.Data;
- using System.Drawing;
- using System.Web;
- using System.Web.SessionState;
- using System.Web.UI;
- using System.Web.UI.WebControls;
- using System.Web.UI.HtmlControls;
- using System.IO;
- using System.Net;
- using System.Text;
- using System.Text.RegularExpressions;
- namespace eMeng.Exam
- {
- /// <summary>
- /// GetPageHtml 的摘要说明。
- /// </summary>
- public class GetPageHtml : System.Web.UI.Page
- {
- protected System.Web.UI.WebControls.Button WebClientButton;
- protected System.Web.UI.WebControls.Button WebRequestButton;
- protected System.Web.UI.WebControls.TextBox ContentHtml;
- protected System.Web.UI.WebControls.TextBox UrlText;
- protected System.Web.UI.WebControls.Button GetText;
- private string PageUrl = "";
- private void Page_Load(object sender, System.EventArgs e)
- {}
- #region Web Form Designer generated code
- override protected void OnInit(EventArgs e)
- {
- InitializeComponent();
- base.OnInit(e);
- }
- /// <summary>
- /// 设计器支持所需的方法 - 不要使用代码编辑器修改
- /// 此方法的内容。
- /// </summary>
- private void InitializeComponent()
- {
- this.WebClientButton.Click += new System.EventHandler(this.WebClientButton_Click);
- this.WebRequestButton.Click += new System.EventHandler(this.WebRequestButton_Click);
- this.GetText.Click += new System.EventHandler(this.GetText_Click);
- this.Load += new System.EventHandler(this.Page_Load);
- }
- #endregion
- private void WebClientButton_Click(object sender, System.EventArgs e)
- {
- PageUrl = UrlText.Text;
- WebClient wc = new WebClient();
- wc.Credentials = CredentialCache.DefaultCredentials;
- ///方法一:
- Byte pageData = wc.DownloadData(PageUrl);
- ContentHtml.Text = Encoding.Default.GetString(pageData);
- /// 方法二:
- /// ***************代码开始**********
- /// Stream resStream = wc.OpenRead(PageUrl);
- /// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
- /// ContentHtml.Text = sr.ReadToEnd();
- /// resStream.Close();
- /// **************代码结束********
- ///
- wc.Dispose();
- }
- private void WebRequestButton_Click(object sender, System.EventArgs e)
- {
- PageUrl = UrlText.Text;
- WebRequest request = WebRequest.Create(PageUrl);
- WebResponse response = request.GetResponse();
- Stream resStream = response.GetResponseStream();
- StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
- ContentHtml.Text = sr.ReadToEnd();
- resStream.Close();
- sr.Close();
- }
- private void GetText_Click(object sender, System.EventArgs e)
- {
- PageUrl = UrlText.Text;
- WebRequest request = WebRequest.Create(PageUrl);
- WebResponse response = request.GetResponse();
- Stream resStream = response.GetResponseStream();
- StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
- ContentHtml.Text = sr.ReadToEnd();
- resStream.Close();
- sr.Close();
- ContentHtml.Text = Regex.Replace(ContentHtml.Text,"<[^>]*>", "");
- //替换空格
- ContentHtml.Text = Regex.Replace(ContentHtml.Text,"\\s+", " ");
- }
- }
- }