Please Help with an asp.net c# webscrape

Oct 31 2008 10:31 PM

Ok, I am working on a webscrape for a friend/customer. I have tried many different approaches, using fiddler to analyze what is going on. But I cannot seem to get it to work. I was hoping that someone with more experience than I have could give it a look and see what I am doing wrong. i am new here, so if I put this code in wrong, please forgive me. My goal here is to scrape this site with a post that it requires, then take the data and format it to print. I havent gotten to the formatting part so lets not worry about that. I am really at my wits end here, I have really tried before coming to ask for help, but I am new to asp.net and would be very grateful for some help.But, anyway,   I am still trying to get data back, here is the code....

default.aspx ( web form ) ************************************************

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" >

<head id="Head1" runat="server">

<title>Pinellas County Sheriff's Office Arrest Inquiry</title>

</head>

<script type="text/javascript">

<!--

var theForm = document.forms['Form1'];

if (!theForm) {

theForm = document.Form1;

}

function __doPostBack(eventTarget, eventArgument) {

if (!theForm.onsubmit || (theForm.onsubmit() != false)) {

theForm.__EVENTTARGET.value = eventTarget;

theForm.__EVENTARGUMENT.value = eventArgument;

theForm.submit();

}

}

// -->

</script>

<body>

<form runat="server" ID="Form1" action="http://www.pcsoweb.com/InmateBooking/Inquiry.aspx">

<asp:Textbox id="txtLastName" runat="server" style="width:5px;" ></asp:TextBox>

<asp:Textbox id="txtFirstName" runat="server" style="width:5px;" ></asp:TextBox>

<asp:Textbox id="txtDOB" runat="server" style="width:5px;" ></asp:TextBox>

<asp:TextBox id="txtBookingDate" runat="server" text="10/22/2008"></asp:TextBox>

<asp:TextBox id="drpPageSize" runat="server" text="15"></asp:TextBox>

<asp:TextBox id="drpSortBy" runat="server" text="Name"></asp:TextBox>

<asp:TextBox id="drpRace" runat="server" text="any"></asp:TextBox>

<asp:TextBox id="drpSex" runat="server" text="any"></asp:TextBox>

<asp:TextBox id="drpAgencies" runat="server" text=""></asp:TextBox>

<asp:TextBox runat="server" id="WebResponseText" Width="780" Height="300" TextMode="MultiLine" />

<asp:Textbox runat="server" id="txtAddress" style="width:5px;"></asp:TextBox>

<asp:Textbox runat="server" id="txtDocketNumber" style="width:5px;" ></asp:TextBox>

<asp:Button runat="server" id="btnBookingDate" text="Go" OnClick="SubmitButton_Click"/><br/>

<asp:Label runat="server" id="WebResponseLabel" /><br />

<asp:HiddenField runat="server" id="__LASTFOCUS" value="" />

<asp:HiddenField runat="server" id="__EVENTARGUMENT" value="" />

<asp:HiddenField runat="server" id="__EVENTTARGET" value="" />

<asp:Checkbox runat="server" id="chkIncludeCharges"></asp:Checkbox>

<asp:Button runat="server" type="submit" name="btnDocketNumber" value="Go" id="btnDocketNumber" />

<asp:Button type="submit" value="Go" id="btnAddressSearch" />

<div id="lblErrorMessage" style="color:Red;"></div>

<div id="lblNameMessage" style="color:Red;"></div>

<div id="lblDateMessage" style="color:Red;"></div>

<div id="lblDocketMessage" style="color:Red;"></div>

<div id="lblAddress" style="color:Red;"></div>

<div id="page_view" runat="server">

</div>

</form>

</body>

</html>

 default.aspx.cs ( code ) ************************************************

using System;

using System.Collections;

using System.Configuration;

using System.Data;

using System.IO;

using System.Linq;

using System.Net;

using System.Text;

using System.Text.RegularExpressions;

using System.Web;

using System.Web.Security;

using System.Web.UI;

using System.Web.UI.HtmlControls;

using System.Web.UI.WebControls;

using System.Web.UI.WebControls.WebParts;

using System.Xml.Linq;

public partial class _Default : System.Web.UI.Page

{

protected void Page_Load(object sender, EventArgs e)

{

}

public void SubmitButton_Click(System.Object sender, System.EventArgs e)

{

StreamReader sr = null;

HttpWebResponse WebResponseObject = null;

System.IO.Stream myRequestStream = null;

//string stringPost = Request.Form.

string stringPost = "";

byte[] buffer = Encoding.UTF8.GetBytes(stringPost);

string RequestUrl = "http://www.pcsoweb.com/InmateBooking/Inquiry.aspx";

//ATTACH URL TO REQUEST OBJECT

HttpWebRequest WebRequestObject = WebRequest.Create(RequestUrl) as HttpWebRequest;

 

//SET HEADERS

WebRequestObject.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-silverlight, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, */*";

WebRequestObject.Referer = "http://www.pcsoweb.com/InmateBooking/Inquiry.aspx";

WebRequestObject.Headers.Add("Accept-Language: en-us");

WebRequestObject.ContentType = "application/x-www-form-urlencoded";

WebRequestObject.Headers.Add("Accept-Encoding:gzip, deflate");

WebRequestObject.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";

//WebRequestObject.Headers.Add("Host:www.pcsoweb.com");

WebRequestObject.ContentLength = (string.IsNullOrEmpty(stringPost)) ? 0 : stringPost.Length;

WebRequestObject.KeepAlive = true;

WebRequestObject.Headers.Add("Pragma:no-cache");

//WebRequestObject.Accept = "Accept-Encoding: gzip, deflate

Cookie c = new Cookie();

c.Name = "ASP.NET_SessionId";

c.Value = Session.SessionID;

c.Domain = "http://www.pcsoweb.com";

WebRequestObject.CookieContainer = new CookieContainer();

WebRequestObject.CookieContainer.Add(c);

try

{

//WebRequestObject = (HttpWebRequest)WebRequest.Create(RequestUrl);

//SET REQUEST METHOD

WebRequestObject.Method = WebRequestMethods.Http.Post;

try

{

myRequestStream = WebRequestObject.GetRequestStream();

}

catch (ProtocolViolationException ex)

{

System.Diagnostics.Debug.WriteLine("Exception;" + System.Environment.NewLine +

ex.Message + System.Environment.NewLine +

ex.StackTrace + System.Environment.NewLine +

ex.Source + System.Environment.NewLine);

}

//Send Request Now

System.IO.StreamWriter streamWriter = new System.IO.StreamWriter(myRequestStream);

streamWriter.Write(stringPost, 0, stringPost.Length);

streamWriter.Flush();

streamWriter.Close();

try

{

WebResponseObject = (HttpWebResponse)WebRequestObject.GetResponse();

}

catch (ProtocolViolationException ex)

{

System.Diagnostics.Debug.WriteLine("Exception;" + System.Environment.NewLine +

ex.Message + System.Environment.NewLine +

ex.StackTrace + System.Environment.NewLine +

ex.Source + System.Environment.NewLine);

}

sr = new StreamReader(WebResponseObject.GetResponseStream());

String Results = sr.ReadToEnd();

this.WebResponseLabel.Text = Results;

this.WebResponseText.Text = Results;

}

finally

{

try

{

sr.Close();

}

catch { }

try

{

WebResponseObject.Close();

WebRequestObject.Abort();

 }

catch { }

        }

     }

}


Answers (1)