HEX
Server: Apache
System: Linux zacp120.webway.host 4.18.0-553.50.1.lve.el8.x86_64 #1 SMP Thu Apr 17 19:10:24 UTC 2025 x86_64
User: govancoz (1003)
PHP: 8.3.26
Disabled: exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname
Upload Files
File: //usr/local/lib/python3.10/test/__pycache__/test_robotparser.cpython-310.opt-1.pyc
o

�i�+�@s(ddlZddlZddlZddlZddlZddlmZddlm	Z	ddlm
Z
ddlmZm
Z
Gdd�d�ZGdd	�d	eej�ZGd
d�deej�ZGdd
�d
eej�ZGdd�deej�ZGdd�de�ZGdd�deej�ZGdd�deej�ZGdd�de�ZGdd�deej�ZGdd�deej�ZGdd�deej�ZGdd�deej�ZGd d!�d!e�ZGd"d#�d#eej�ZGd$d%�d%eej�ZGd&d'�d'eej�ZGd(d)�d)eej�Z Gd*d+�d+eej�Z!Gd,d-�d-eej�Z"Gd.d/�d/e�Z#Gd0d1�d1ej�Z$Gd2d3�d3ej�Z%e&d4k�re�'�dSdS)5�N)�support)�
socket_helper)�threading_helper)�BaseHTTPRequestHandler�
HTTPServerc@sHeZdZdZdZgZgZdZdd�Zdd�Z	dd	�Z
d
d�Zdd
�ZdS)�
BaseRobotTest�Ztest_robotparserNcCs,t�|j���}tj��|_|j�|�dS�N)	�io�StringIO�
robots_txt�	readlines�urllib�robotparser�RobotFileParser�parser�parse)�self�lines�r�2/usr/local/lib/python3.10/test/test_robotparser.py�setUpszBaseRobotTest.setUpcCs$t|t�r
|\}}||fS|j|fSr	)�
isinstance�tuple�agent�r�urlrrrr�get_agent_and_urls

zBaseRobotTest.get_agent_and_urlc	C�`|jD]*}|�|�\}}|j||d��|�|j�||��Wd�n1s(wYqdS�N)rr)�goodr�subTest�
assertTruer�	can_fetchrrrr�test_good_urls�
���zBaseRobotTest.test_good_urlsc	Crr)�badrr!�assertFalserr#rrrr�
test_bad_urls$r%zBaseRobotTest.test_bad_urlscCs|�|j��|j�dSr	)�assertEqualr�	site_maps�rrrr�test_site_maps*�zBaseRobotTest.test_site_maps)
�__name__�
__module__�__qualname__rrr r&r*rrr$r(r,rrrrrsrc@s eZdZdZddgZgd�ZdS)�UserAgentWildcardTestz�User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    �/�
/test.html)�/cyberworld/map/index.htmlz/tmp/xxx�	/foo.htmlN�r.r/r0rr r&rrrrr1.sr1c@seZdZdZgd�ZdgZdS)�CrawlDelayAndCustomAgentTestz�# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )r2r3)Zcybermapperr4r4Nr6rrrrr79s
r7c@s&eZdZdZddgZdgZddgZdS)�SitemapTesta# robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    r2r3r4z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r.r/r0rr r&r*rrrrr8Js
�r8c@seZdZdZgZgd�ZdS)�RejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    )r4r2�/tmp/Nr6rrrrr9[sr9c@seZdZdZdZdd�ZdS)�BaseRequestRateTestNc	Cs�|j}|j|jD]U}|�|�\}}|j||d��=|�|�|�|j�|�|�}|�||j�|jdurO|�|t	j
j�|�|j|jj�|�|j
|jj
�Wd�n1sYwYq	dSr)rr r&rr!r)�crawl_delay�request_rateZassertIsInstancerr�RequestRateZrequestsZseconds)rrrrZparsed_request_raterrr�test_request_rateis0

�������z%BaseRequestRateTest.test_request_rate)r.r/r0r=r<r?rrrrr;esr;c@seZdZdZdgZdS)�
EmptyFileTestrz/fooN)r.r/r0rr rrrrr@�s
r@c@s4eZdZdZdZej�dd�ZdZ	dgZ
gd�ZdS)	�CrawlDelayAndRequestRateTestz�User-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    �figtree�	��)rBr5)�/tmpz	/tmp.html�/tmp/a.html�/a%3cd.html�/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r.r/r0rrrrr>r=r<r r&rrrrrA�s	rAc@�eZdZdZdS)�DifferentAgentTestzFigTree Robot libwww-perl/5.04N�r.r/r0rrrrrrK��rKc@s"eZdZdZdgZgd�ZdZdS)�InvalidRequestRateTestz�User-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rF)r:rGrHrIz	/a/b.htmlz/%7Ejoe/index.htmlrEN)r.r/r0rr r&r<rrrrrN�s
	rNc@seZdZdZdgZgZdS)�InvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    r5Nr6rrrrrO�srOc@� eZdZdZdZdgZdgZdS)�AnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    �	Googlebot�/folder1/myfile.html�/folder1/anotherfile.htmlN�r.r/r0rrr r&rrrrrQ�s

rQc@seZdZdZdZdgZdS)�UserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    rRz/something.jpgN)r.r/r0rrr&rrrrrV�s
rVc@rJ)�UserAgentGoogleMobileTestzGooglebot-MobileNrLrrrrrW�rMrWc@rP)�GoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    Z	googlebotrSrTNrUrrrrrX�s

rXc@�eZdZdZdgZdgZdS)�DisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    �
/some/pathz/some/path?name=valueNr6rrrrrZ�s
rZc@rY)�UseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z
/another/pathr[Nr6rrrrr\�s
r\c@rY)�EmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?Nr6rrrrr]�s
r]c@s0eZdZdZej�dd�ZdZddgZ	dgZ
dS)	�DefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rE��r2r3r4N)r.r/r0rrrr>r=r<r r&rrrrr^s
r^c@seZdZdZdZdd�ZdS)�StringFormattingTestz�User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/cCs|�t|j�|j�dSr	)r)�strr�expected_outputr+rrr�test_string_formatting*r-z+StringFormattingTest.test_string_formattingN)r.r/r0rrcrdrrrrras
rac@seZdZdd�Zdd�ZdS)�RobotHandlercCs|�dd�dS)Ni�zForbidden access)Z
send_errorr+rrr�do_GET0szRobotHandler.do_GETcGsdSr	r)r�format�argsrrr�log_message3szRobotHandler.log_messageN)r.r/r0rfrirrrrre.srec@s*eZdZdd�Zdd�Zejdd��ZdS)�PasswordProtectedSiteTestCasecCsP|�tjj�ttjdft�|_t	j
d|jjddid�|_d|j_
|j��dS)NrzHTTPServer servingZ
poll_intervalg{�G�z�?)�name�target�kwargsT)Z
addCleanuprZrequest�
urlcleanuprr�HOSTre�server�	threading�ThreadZ
serve_forever�t�daemon�startr+rrrr9s�z#PasswordProtectedSiteTestCase.setUpcCs"|j��|j��|j��dSr	)rp�shutdownrs�joinZserver_closer+rrr�tearDownIs

z&PasswordProtectedSiteTestCase.tearDowncCs\|jj}dtjdt|d�}|d}tj��}|�|�|�	�|�
|�d|��dS)Nzhttp://�:r`z/robots.txt�*)rpZserver_addressrrorbrrrZset_url�readr'r#)r�addrrZ
robots_urlrrrr�testPasswordProtectedSiteNs

z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r.r/r0rrxrZreap_threadsr}rrrrrj7s
rjc@sFeZdZdZd�e�Zedd��Zdd�Zdd�Z	d	d
�Z
dd�Zd
S)�NetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtcCsVt�d�t�|j��tj�|j�|_	|j	�
�Wd�dS1s$wYdS)NZnetwork)rZrequiresrZtransient_internet�base_urlrrrrrr{)�clsrrr�
setUpClass^s

"�zNetworkTestCase.setUpClasscCs&d�|j|tj�|�dsd�Sd�S)Nz{}{}{}r`r2r)rgr�os�path�splitext)rr�rrrres
��zNetworkTestCase.urlcCsV|�|jj�|�|jj�|�|j��d�|�|j�d��|�|j�d��dS)Nrrz)r'r�disallow_all�	allow_allZ
assertGreater�mtimer<r=r+rrr�
test_basicjs
zNetworkTestCase.test_basiccCs�|�|j�d|�d���|�|j�d|j��|�|j�d|�d���|�|j�d|�d���|�|j�d|�d���|�|j�d|j��dS)NrzZ	elsewhereZNutchZbrianZwebstats)r"rr#rr'rr+rrr�test_can_fetchqszNetworkTestCase.test_can_fetchcCsftj�|�d��}|��|�|j�|�|j�|�	|�
�d�|�|�d��|�|�
d��dS)Nzi-robot.txtrrz)rrrrr{r"r�r'r�r)r�ZassertIsNoner<r=)rrrrr�
test_read_404yszNetworkTestCase.test_read_404N)r.r/r0rrgr�classmethodr�rr�r�r�rrrrr~Ys

r~�__main__)(r
r�rqZunittestZurllib.robotparserr�testrZtest.supportrrZhttp.serverrrrZTestCaser1r7r8r9r;r@rArKrNrOrQrVrWrXrZr\r]r^rarerjr~r.�mainrrrr�<module>sF"





	"
)�