configuration class allows some configuration variables and methods
public class Configuration
{
public int sequenceLen = 10000;
public int bigPauseSeconds = 9;
public int littlePauseSeconds = 2;
public int frequencyBigPauseMax = 21;
public int frequencyBigPauseMin = 5;
public int deepBrowsing = 3; //
public List<string> browserIdentities = new List<string>();
public Configuration()
{
LoadBrowserIdenties();
}
//only for webclient class usage
private void LoadBrowserIdenties()
{
//add here "user-agent" http values
browserIdentities.Add("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
}
}
LoadBrowserIdentities allows loading of "user-agent" var for multiple browsers, at this time one :-) ,
Util class do ...utility class and contains methods helper for entire application
public class Util
{
public Util()
{
}
public string HtmlEncode(string input)
{
return HttpUtility.UrlEncode(input);
}
public List<int> CreatePauseSequence()
{
Configuration c = new Configuration();
List<int> sequencePauseList = new List<int>();
Random r = new Random();
int bigPauseRndFrequency = r.Next(c.frequencyBigPauseMin, c.frequencyBigPauseMax);
int start = 0;
int end = c.sequenceLen;
while (start < end)
{
if ((start % bigPauseRndFrequency) == 0)
{
sequencePauseList.Add(this.BigPauseCalculator());
}
else
{
sequencePauseList.Add(this.LittlePauseCalculator());
}
start++;
}
return sequencePauseList;
}
private int BigPauseCalculator()
{
Configuration c = new Configuration();
int initial = c.bigPauseSeconds;
Random r = new Random();
int randRet = r.Next(10);
int final = initial * randRet;
return final;
}
private int LittlePauseCalculator()
{
int finalLittle = 0;
Configuration c = new Configuration();
Random r = new Random();
int rndRet = r.Next(10);
int l = c.littlePauseSeconds;
finalLittle = rndRet * l;
return finalLittle;
}
}
class NodeResource is a node of navigation process ,it extends Node class
public class Node
{
public Node()
{
}
}
public class NodeResource:Node
{
public int deepNode = 0;
public string uri = null;
public bool visited = false;
public HtmlDocument currDocument = null;
public string strCurrDocument = null;
public List<KeywordsResult> listK = null;
public List<RegExResult> listR = null;
public byte[] binaryResource;
public NodeResource()
{
}
}
class Navigation is navigation process
public class Navigation
{
public List<string> keywords = new List<string>();
public List<string> regularExpression = new List<string>();
public List<NodeResource> listNodes = new List<NodeResource>();
public int deepNodeLimit = 0;
public Navigation()
{
Configuration c = new Configuration();
this.deepNodeLimit = c.deepBrowsing;
}
}
class RegExResult store result of regular expressions extracted from uri data
public class RegExResult
{
public string regExtext = null;
public List<string> matchList = null;
public RegExResult()
{
}
}
How it works :
here codebehind from a windows form
for first is called Load method when form is loaded ,
it create a timer but disabled ,when button is clicked on interface of windows form
navigation class is instanced and timer property "enabled" is setted to true value, timer run , when it elapses
for first enable value of timer is setted to "false" , after setted navigate methods of webbrowser control is called,when downloadcompleted event from webbrrowser control is elapsed noderesource object have an embedded htmldocument ,all works with enabling disablign timer,I have choosed this solution for emulating a human user in this software and for this it hava e DoSleep() function ,it stop current thread for a pause ,pause is short or long and values are random from a min and a max ,
iinto Configuration class you have a deep value it is max deep of exploration ,
public partial class Form1 : Form
{
List<int> calculatedSleepInterval = null;
List<HtmlElement> links = new List<HtmlElement>();
List<HtmlDocument> listHtmlDocs = new List<HtmlDocument>();
public Navigation nav = null;
System.Timers.Timer timer = null;
NodeResource currNodeResource = null;
public Form1()
{
InitializeComponent();
}
private void button6_Click(object sender, EventArgs e)
{
//add here result speech processor...
//add <br> and "." parse consideration introducing pauses during reading process
}
private void Form1_Load(object sender, EventArgs e)
{
TimerCreation();
//EmailManager email= new EmailManager();
CalculateSequencePause();
Util u = new Util();
this.webBrowser1.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(webBrowser1_DocumentCompleted);
}
private void TimerCreation()
{
timer = new System.Timers.Timer();
timer.AutoReset = true;
timer.Enabled = false;
timer.Interval = 2000;
timer.Elapsed += new ElapsedEventHandler(timer_Elapsed);
}
/// <summary>
/// timer do all works ,
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void timer_Elapsed(object sender, ElapsedEventArgs e)
{
//go to first unknown uri
int counted = this.nav.listNodes.Count;
int counter = 0;
bool found=false;
//found unvisited resource
while (counter < counted && !found)
{
NodeResource nr = this.nav.listNodes[counter];
if (!nr.visited)
{
//here is unique point for calling Navigate() method
this.currNodeResource = nr;
//stop timer
this.timer.Enabled = false;
found = true;
this.webBrowser1.Navigate(new Uri(nr.uri));
}
counter++;
}
//if found ==false exit()
}
private void CalculateSequencePause()
{
Util u = new Util();
List<int> sequencePause = u.CreatePauseSequence();
this.calculatedSleepInterval = sequencePause;
}
void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser w = (WebBrowser)sender;
//set currNode
currNodeResource.uri = w.Url.ToString();
currNodeResource.currDocument = w.Document;
currNodeResource.visited = true;
currNodeResource.strCurrDocument = w.DocumentText;
//replace currNode in list nodes
int counted=this.nav.listNodes.Count;
int counter=0;
bool found = false;
while(counter < counted && !found) {
NodeResource res = this.nav.listNodes[counter];
if(res.uri.Equals(w.Url.ToString())) {
//replace NodeResource
this.nav.listNodes[counter]=currNodeResource;
}
}
//here get sub nodes
if (currNodeResource.deepNode < nav.deepNodeLimit)
{
//links to resources
counted = w.Document.Links.Count;
counter = 0;
while (counter < counted)
{
//create and append a node for each new uri
HtmlElement elem = w.Document.Links[counter];
NodeResource n = new NodeResource();
n.deepNode = currNodeResource.deepNode + 1;
n.uri = elem.GetAttribute("href");
this.nav.listNodes.Add(n);
counter++;
}
}
//here re-enable timer object
DoSleep();
this.timer.Enabled = true;
}
private void button8_Click(object sender, EventArgs e)
{
//start navigation
if (this.nav == null)
{
Navigation n = new Navigation();
}
else
{
// clear previous navigation values
// this.nav.listNodes.Clear();
}
//single node creation
NodeResource node = new NodeResource();
node.uri = this.textBoxUrl.Text;
node.deepNode = 1;
this.currNodeResource = node;
//append node in navigation
this.nav.listNodes.Add(node);
this.timer.Enabled = true;
}
private void DoSleep()
{
if (this.calculatedSleepInterval.Count < 1)
{
this.CalculateSequencePause();
}
int pause = this.calculatedSleepInterval[0];
Thread.Sleep(pause * 1000);
this.calculatedSleepInterval.RemoveAt(0);
}
}
application will provide for reading result using speech synthesizer ,
at this time I search a stand alone synonyms dictionary for an addtion of correlation to basic, plain ,search of keywords.I love multithreading tecniques but this application will emulate a human user and he have a single conscious process ..or not? ,
code is not tested and not compiled...
0 commenti:
Post a Comment