一网络信息浏览

1 HTTP协议

2 客户端与服务器

3 Request与Response

4 Stream

5 Get与Post

在这里插入图片描述

二一些查看工具

1 Fiddler2

http://www.fidddler2.com

2 其他工具

如NetworkMoniter、Visula Sniffer、httpwatch、WireShark

3 Chrom/FireFox等浏览器F12

① Chrome 中按F12，或点右键，“审查元素”;
② FireFox中安装FireBug;

三使用System.Web

① System.Web提供支持浏览器/服务器通讯的类和接口。
② 此命名空间包括提供有关当前HTTP请求的大量信息的Request类，管理HTTP到客户端的输出Response类，以及提供对服务器端实用工具和进程的访问的HttpServerUtility对象。
③ System.Web还包括用于Cookie操作、文件传输、异常信息和输出缓存控制的类。

四 WebClient类

1 DownloadData及DownloadFile

① 后来又有DownloadStriing;
② UploadData及UploadFile;
③ OpenRead及OpenWrite;

2 示例：WebClientDownload.cs

① string url=@“http://www.baidu.com”;
② WebClient client=new WebClient();
③ byte[] pageData=client.DownloadData(url);
④ string pageHtml=Encoding.Default.GetString(pageData);
⑤ Console.WriteLine(pageHtml);

3 WebRequest及WebResponse

① WebRequest myRequest=
WebRequest.Create(“http://www.contoso.com”);
② WebResponse myREsponse=myRequest.GetResponse();
③ Stream requestStream =myRequest.GetRequestStream();
④ Stream receiveStream=myWebResponse.GetResponseStream();

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace 使用WebClient
{
    internal class Program
    {
        static void Main(string[] args)
        {
            string url = @"https://localhost/test.html";
            WebClient client = new WebClient();
            byte[] pageData = client.DownloadData(url);
            string pageHtml = Encoding.Default.GetString(pageData);
            Console.WriteLine(pageHtml);
            Console.ReadKey();
        }
    }
}

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace DownloadString
{
    internal class Program
    {
        static void Main(string[] args)
        {
            string url = "https://www.baidu.com";
            if (args.Length != 0)
                url = args[0];

            string str = DownloadString(url);

            Console.WriteLine(str);
            Console.ReadKey();
        }

        public static string DownloadString(string url)
        {
            try
            {
                HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
                request.Credentials = CredentialCache.DefaultCredentials;
                HttpWebResponse response = request.GetResponse() as HttpWebResponse;

                Stream responseStream = response.GetResponseStream();
                Encoding encoding = Encoding.UTF8;
                StreamReader reader = new StreamReader(responseStream, encoding);
                string str = reader.ReadToEnd();
                reader.Close();
                responseStream.Close();
                response.Close();
                return str;
            }
            catch(UriFormatException exception)
            {
                Console.WriteLine(exception.Message.ToString());
                Console.WriteLine("Invalid URL format.Please use https://www.yoursite.com");
            }
            catch(WebException exception2)
            {
                Console.WriteLine(exception2.Message.ToString());
            }
            return "";
        }
    }
}

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace 猜字符的编码
{
    internal class Program
    {
        static void Main(string[] args)
        {
            string url = "https://www.pku.edu.cn";
            if (args.Length != 0)
                url = args[0];
            string str = DownloadString(url);
            Console.WriteLine(str);
            Console.ReadKey();
        }

        public static string DownloadString(string url)
        {
            string html = "";

            try
            {
                HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
                request.Credentials = CredentialCache.DefaultCredentials;
                HttpWebResponse response = request.GetResponse() as HttpWebResponse;

                Stream responseStream = response.GetResponseStream();

                Encoding encoding = GuessDownloadEncoding(response);//用这个编码更好
                if (encoding != null)
                {
                    StreamReader reader = new StreamReader(responseStream, encoding);
                    html = reader.ReadToEnd();
                    reader.Close();
                }
                else
                {
                    byte[] htmlByte = GetByteContent(responseStream);
                    html = Encoding.GetEncoding("utf-8").GetString(htmlByte);

                    string reg_charset = "(<meta[^>]*charset=(?<charset>[^>'\"]*)[\\s\\S]*?>)|(xml[^>]+encoding=(\"|')*(?<charset>[^>'\"]*)[\\s\\S]*?>)";

                    Regex r = new Regex(reg_charset, RegexOptions.IgnoreCase);
                    Match m = r.Match(html);
                    string encodingName = (m.Captures.Count != 0) ? m.Result("${charset}") : "";
                    Console.WriteLine(encodingName);
                    if (encodingName != "")
                    {
                        html = Encoding.GetEncoding(encodingName).GetString(htmlByte);
                    }
                }
                responseStream.Close();
                response.Close();
            }
            catch (UriFormatException exception)
            {
                Console.WriteLine(exception.Message.ToString());
            }
            catch (WebException exception2)
            {
                Console.WriteLine(exception2.Message.ToString());
            }
            return html;
        }

        private static byte[] GetByteContent(Stream stream)
        {
            ArrayList arBuffer = new ArrayList();

            byte[] buffer = new byte[1024];
            int offset = 1024;
            int count = stream.Read(buffer, 0, offset);
            while(count>0)
            {
                for(int i=0;i<count;i++)
                {
                    arBuffer.Add(buffer[i]);
                }
                count = stream.Read(buffer, 0, offset);
            }

            return (byte[])arBuffer.ToArray(typeof(byte));
        }

        private static Encoding GuessDownloadEncoding(HttpWebResponse response)
        {
            string charset = GetCharSet(response.ContentType);
            if (charset == "")
                charset = GetCharSet(response.Headers["Content-Type"]);

            try
            {
                if (charset != "")
                    return Encoding.GetEncoding(charset);
            }
            catch { }
            return null;
        }

        private static string GetCharSet(string contentType)
        {
            Console.WriteLine("contentType:" + contentType);

            if (contentType == null | contentType == "")
                return "";

            string[] strArray = contentType.ToLower().Split(new char[] { ',', '=', ' ' });
            bool flag = false;
            foreach(string str2 in strArray)
            {
                if(str2=="charset")
                {
                    flag = true;

                }
                else if(flag)
                {
                    return str2;
                }
            }
            return "";
        }
    }
}

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace 下载网页中的所有图片
{
    internal class Program
    {
        static void Main(string[] args)
        {
            string pattern = @"(<)(.[^<]*)((src|href)=)('|""| )?(?<fileUrl>(.[^'|\s|""]*)(\.)(jpg|gif|png|bmp|jpeg|swf))('|""|\s|>)(.[^>]*)(>)";
            string pageUrl = @"https://www.pku.edu.cn/";

            if(args!=null&&args.Length>0)
            {
                pageUrl = args[0];
            }

            pageUrl = pageUrl.Replace('\\', '/');
            int p = pageUrl.LastIndexOf('/');
            string urlPath = pageUrl.Substring(0, p + 1);
            int ph = pageUrl.IndexOf('/', pageUrl.IndexOf('/') + 2);
            string urlHost = pageUrl.Substring(0, ph);

            string pageContent = DownOnePage(pageUrl);
            Console.WriteLine(pageContent);

            Regex rx = new Regex(pattern, RegexOptions.IgnoreCase);

            MatchCollection mc = rx.Matches(pageContent);
            Console.WriteLine("有{0}次匹配", mc.Count);
            foreach(Match mt in mc)
            {
                string fileUrl = mt.Result("${fileUrl}");

                fileUrl = fileUrl.Replace('\\', '/');
                if(fileUrl.IndexOf(':')<0)//它不包含协议名
                {
                    if (fileUrl[0] == '/')//它是绝对路径
                        fileUrl = urlHost + fileUrl;
                    else//相对路径
                        fileUrl = urlPath + fileUrl;
                }

                Console.WriteLine(fileUrl);

                int p2 = fileUrl.LastIndexOf('/');

                string fileName = fileUrl.Substring(p2 + 1);

                DownOneFile(fileUrl, fileName);

                Console.ReadKey();
            }
        }

        static void DownOneFile(string url,string fileName)
        {
            WebClient client = new WebClient();

            client.DownloadFile(url, fileName);
        }

        static string DownOnePage(string url)
        {
            WebClient client = new WebClient();

            byte[] pageData = client.DownloadData(url);

            string pageHtml = Encoding.UTF8.GetString(pageData);

            return pageHtml;
        }
    }
}