Note that there are some explanatory texts on larger screens.

plurals
  1. POHow to fix" xrange() arg 3 must not be zero" error in python using parallel programming?
    primarykey
    data
    text
    <pre><code>import time from multiprocessing import Process, Pool import sys, os, inspect import urllib import re index ={} graph={} # Common words that we don't want to be part of the index g=['is','a','the','ga','all','to','under'] def rm_tag(data): p = re.compile(r'&lt;.*?&gt;') return p.sub('', data) def get_page(url): try: f = urllib.urlopen(url) page = f.read() f.close() return page except: return "" def union(a,b): for e in b: if e not in a: a.append(e) def get_next_url(page): start_link=page.find("&lt;a href=") if(start_link==-1): return None,0 start_quote=page.find('"',start_link) end_quote=page.find('"',start_quote+1) url=page[start_quote+1:end_quote] return url,end_quote def get_all_links(page): links=[] while True: url,endpos=get_next_url(page) page=page[endpos:] if url: links.append(url) else: break print "get_all_links: %i links found\n" % len(links) graph[page]=[links] return graph[page] def add_to_index(index,url,keyword): if keyword in index: if url not in index[keyword]: index[keyword].append(url) return global g if keyword not in g: index[keyword]=[url] def add_page_to_index(index, url, content): words = content.split() for word in words: add_to_index(index, url,word) def compute_ranks(graph): d=0.8 numloops=20 ranks={} npages=len(graph) for page in graph: ranks[page]=1.0/npages for i in range(0,numloops): newranks={} for page in graph: newrank=(1-d)/npages for node in graph: if page in graph[node]: newrank=newrank + d * (ranks[node]/len(graph[node])) newranks[page]=newrank ranks=newranks return ranks def Look_up(index, keyword): if keyword in index: return index[keyword] else: return None def Look_up_new(index,ranks,keyword): pages=Look_up(index,keyword) if pages: for i in pages: print '\n Results with Page Ranks :\n'+i+" --&gt; "+str(ranks[i]) else: print "Keyword does not exist" def lucky_search(index, ranks, keyword): try: pages = Look_up(index, keyword) if pages: bestpage = pages[0] for candidate in pages: if ranks[candidate] &gt; ranks[bestpage]: bestpage = candidate return (bestpage, ranks[bestpage], pages) except: print "Keyword does not exist",sys.exc_info()[0] return None def print_profile(index, graph): print "*****************" print "Length of index", len(index) print "Length of graph", len(graph) i = 0 for e in index: i = i + 1 print i, ":", e if i &gt; 20: break print "*****************" def print_profile_top (index, ranks): max = 0 for e in index: link_count = len(index[e]) if link_count &gt; max: max = link_count # updating the highest no of links we have found so far print e, link_count, lucky_search(index,ranks,e)[1] print "*********************" #print "result for :", search_term, ": ", lucky_search(index,ranks,search_term) #print lucky_search(index,ranks,'limited') def chunks(l, n): for i in xrange(0,len(l),n): yield l[i:i+n] # instantly makes chunks a "generator function" instead of a normal function if __name__ == '__main__': start = time.clock() c = 3 seed= "http://www.python.org" keyword = "CGI" max_page = 20 tocrawl=[seed] crawled =[] print '\nCrawling using ' + seed + " as seed_page and search_term: " + keyword pool = Pool (c) while tocrawl: page=tocrawl.pop() if page not in crawled: max_page -= 1 if max_page&lt;=0: break content=get_page(page) text=content partitioned_text= list(chunks(text, len(text) / c)) links=pool.map(get_all_links, partitioned_text) #links=get_all_links(content) #content=rm_tag(content) add_page_to_index(index,page,content) for e in links: if e not in tocrawl: tocrawl.append(e) #union(tocrawl,graph[page]) crawled.append(page) #pool.terminate() ranks=compute_ranks(graph) print_profile(index, graph) print_profile_top(index, ranks) print "result for :", keyword, ": ", lucky_search(index,ranks,keyword) t=time.clock() - start print "Processing Time :",t #print crawled,index,graph """ </code></pre> <p>while running the code the following error shows. please help me to fix.</p> <pre><code>Traceback (most recent call last): File "C:\Documents and Settings\priyanka.14790\My Documents\Dropbox\Udacity\parallel.py", line 250, in &lt;module&gt; partitioned_text= list(chunks(text, len(text) / c)) File "C:\Documents and Settings\priyanka.14790\My Documents\Dropbox\Udacity\parallel.py", line 229, in chunks for i in xrange(0,len(l),n): ValueError: xrange() arg 3 must not be zero </code></pre> <hr> <p>Here is the code for normal search engine without any error</p> <pre><code> import sys, os, inspect import urllib import re max_page=5 # Common words that we don't want to be part of the index g=['is','a','the','ga','all','to','under'] def rm_tag(data): p = re.compile(r'&lt;.*?&gt;') return p.sub('', data) def get_page(url): try: f = urllib.urlopen(url) page = f.read() f.close() return page except: return "" def union(a,b): for e in b: if e not in a: a.append(e) def get_next_url(page): start_link=page.find("&lt;a href=") if(start_link==-1): return None,0 start_quote=page.find('"',start_link) end_quote=page.find('"',start_quote+1) url=page[start_quote+1:end_quote] return url,end_quote def get_all_links(page): links=[] while True: url,endpos=get_next_url(page) page=page[endpos:] if url: links.append(url) else: break return links def add_to_index(index,url,keyword): if keyword in index: if url not in index[keyword]: index[keyword].append(url) index[keyword]=[url] def add_page_to_index(index, url, content): words = content.split() for word in words: add_to_index(index, url,word) def compute_ranks(graph): d=0.8 numloops=20 ranks={} npages=len(graph) for page in graph: ranks[page]=1.0/npages for i in range(0,numloops): newranks={} for page in graph: newrank=(1-d)/npages for node in graph: if page in graph[node]: newrank=newrank + d * (ranks[node]/len(graph[node])) newranks[page]=newrank ranks=newranks return ranks def crawl_web(seed): tocrawl=[seed] crawled =[] index ={} graph={} global max_page while tocrawl: page=tocrawl.pop() if page not in crawled: max_page -= 1 if max_page&lt;=0: break c = get_page(page) graph[page]=get_all_links(c) c=rm_tag(c) add_page_to_index(index,page,c) union(tocrawl,graph[page]) crawled.append(page) return crawled,index,graph def Look_up(index, keyword): if keyword in index: return index[keyword] else: return None def lucky_search(index, ranks, keyword): try: pages = Look_up(index, keyword) if pages: bestpage = pages[0] for candidate in pages: if ranks[candidate] &gt; ranks[bestpage]: bestpage = candidate return (bestpage, ranks[bestpage], pages) except: print "Keyword does not exist" seed_page = "http://www.python.org" search_term = "Take" try: print "Enter the Max Limit to Search :" max_limit=int(raw_input()) except: max_page = 10 max_page = max_limit print '\nCrawling using ' + seed_page + " as seed_page and search_term: " + search_term crawled,index,graph=crawl_web(seed_page) ranks=compute_ranks(graph) def print_profile(index, graph): print "*****************" print "Length of index", len(index) print "Length of graph", len(graph) i = 0 for e in index: i = i + 1 print i, ":", e if i &gt; 20: break print "*****************" def print_profile_top (index, ranks): max1 = 0 for e in index: link_count = len(index[e]) if link_count &gt; max1: max1= link_count print e, link_count, lucky_search(index,ranks,e)[1] print "*********************" print_profile(index, graph) print_profile_top(index, ranks) print "result for :", search_term, ": ", lucky_search(index,ranks,search_term) </code></pre> <p>the output is : Enter the Max Limit to Search : 10</p> <pre><code> Crawling using http://www.python.org as seed_page and search_term: Take ***************** Length of index 1281 Length of graph 9 1 : Canada 2 : limited 3 : all 4 : here"--&gt;. 5 : unclear, 6 : CGI, 7 : 08:00 8 : enabled: 9 : results 10 : href=&amp;quot;&amp;quot; 11 : :/ 12 : subtle 13 : Take 14 : Buildbot, 15 : pyBiblio, 16 : CD&amp;#8221;, 17 : href="/search-pymodules.xml"/&gt; 18 : nothing 19 : Foundation 20 : pyArkansas 21 : depend ***************** Canada 1 0.0222222222222 ********************* result for : Take : ('http://www.timparkin.co.uk/2012/08/why-you-cant-make-digital-look-like-velvia-50/', 0.022821308980213083, ['http://www.timparkin.co.uk/2012/08/why-you-cant-make-digital-look-like-velvia-50/']) &gt;&gt;&gt; </code></pre> <p>Please run and see the difference.</p>
    singulars
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload