[Tutor] Scraping gov site: site looking for Flash player
Roy Hinkelman
royhink at gmail.com
Mon Apr 5 20:31:39 CEST 2010
Interesting.
I am using urllib2 to open some government pages, and they have some js
checking for Flash on my computer.
Is there a way to show them that I have flash? Or possibly another solution?
My code:
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent' : user_agent}
req = urllib2.Request(_URL, None, headers)
data = mechanize.urlopen(req)
_soup = B_S(data)
And what I get back from 'print _soup':
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head><title>Welcome to OurDocuments.gov</title>
<!-- Prevent caching in netscape. Very important because netscape tends to
mangle cached code -->
<meta http-equiv="expires" content="0" />
<meta http-equiv="Pragma" content="no-cache" />
<script language="JavaScript" type="text/javascript">
<!--
// moock fpi [f.lash p.layer i.nspector]
// version: 1.3.5
// written by colin moock
// code maintained at:
http://www.moock.org/webdesign/flash/detection/moockfpi/
// terms of use posted at: http://www.moock.org/terms/
//
=============================================================================
// These are the user defined globals.
// Modify the following variables to customize the inspection behaviour.
var requiredVersion = 6; // Version the user needs to view site (max 6,
min 2)
var useRedirect = true; // Flag indicating whether or not to load a
separate
// page based on detection results. Set to true
to
// load a separate page.
// Only set next three vars if useRedirect is true...which it is...
var flashPage = "/doc.php?flash=true&doc=2"; // The location of the
flash movie page
var noFlashPage = "/doc.php?flash=false&doc=2"; // Page displayed if the
user doesn't have the
// plugin or we can't detect it.
var upgradePage = "/doc.php?flash=old&doc=2"; // Page displayed if we
detect an old plugin
//
=============================================================================
// *************
// Everything below this point is internal until after the BODY tag.
// Do not modify! Proceed to the BODY tag for further instructions.
// *************
// System globals
var flash2Installed = false; // boolean. true if flash 2 is installed
var flash3Installed = false; // boolean. true if flash 3 is installed
var flash4Installed = false; // boolean. true if flash 4 is installed
var flash5Installed = false; // boolean. true if flash 5 is installed
var flash6Installed = false; // boolean. true if flash 6 is installed
var maxVersion = 6; // highest version we can actually detect
var actualVersion = 0; // will become the version the user really
has
var hasRightVersion = false; // boolean. will become true if it's safe to
embed the flash movie in the page
var jsVersion = 1.0; // the version of javascript supported
// -->
</script>
<script language="JavaScript1.1" type="text/javascript">
<!--
// Check the browser...we're looking for ie/win
var isIE = (navigator.appVersion.indexOf("MSIE") != -1) ? true : false;
// true if we're on ie
var isWin = (navigator.appVersion.indexOf("Windows") != -1) ? true : false;
// true if we're on windows
// This is a js1.1 code block, so make note that js1.1 is supported.
jsVersion = 1.1;
// Write vbscript detection on ie win. IE on Windows doesn't support regular
// JavaScript plugins array detection.
if(isIE && isWin) {
document.write('<SCR' + 'IPT LANGUAGE=VBScript\> \n');
document.write('on error resume next \n');
document.write('flash2Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.2"))) \n');
document.write('flash3Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.3"))) \n');
document.write('flash4Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.4"))) \n');
document.write('flash5Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.5"))) \n');
document.write('flash6Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.6"))) \n');
document.write('</scr' + 'ipt\> \n'); // break up end tag so it doesn't
end our script
}
// -->
</script>
<script language="JavaScript" type="text/javascript">
<!--
// Next comes the standard javascript detection that uses the
// navigator.plugins array. We pack the detector into a function so it loads
// before we call it.
function detectFlash() {
// If navigator.plugins exists...
if (navigator.plugins) {
// ...then check for flash 2 or flash 3+.
if (navigator.plugins["Shockwave Flash 2.0"] ||
navigator.plugins["Shockwave Flash"]) {
// Some version of Flash was found. Time to figure out which.
// Set convenient references to flash 2 and the plugin description.
var isVersion2 = navigator.plugins["Shockwave Flash 2.0"] ? " 2.0" :
"";
var flashDescription = navigator.plugins["Shockwave Flash" +
isVersion2].description;
// DEBUGGING: uncomment next line to see the actual description.
// alert("Flash plugin description: " + flashDescription);
// A flash plugin-description looks like this: Shockwave Flash 4.0 r5
// We can get the major version by grabbing the character before the
period
// note that we don't bother with minor version detection here (since
we don't need to, and it is best accomplished from within an actual Flash
movie anyway).
var flashVersion =
parseInt(flashDescription.charAt(flashDescription.indexOf(".") - 1));
// wow this is old now. to accomodate flash players 10 (!!) and higher,
following code was added. d.cole. 3/2010
var flashVersion2 =
parseInt(flashDescription.charAt(flashDescription.indexOf(".") - 2));
// We found the version, now set appropriate version flags. Make sure
// to use >= on the highest version so we don't prevent future version
// users from entering the site.
flash2Installed = flashVersion == 2;
flash3Installed = flashVersion == 3;
flash4Installed = flashVersion == 4;
flash5Installed = flashVersion == 5;
flash6Installed = flashVersion >= 6;
// wow this is old now. to accomodate flash players 10 (!!) and higher,
following code was added. d.cole. 3/2010
flash6Installed = flashVersion2 >= 1;
}
}
// Loop through all versions we're checking, and
// set actualVersion to highest detected version.
for (var i = 2; i <= maxVersion; i++) {
if (eval("flash" + i + "Installed") == true) {
actualVersion = i;
}
}
if (!actualVersion) {
actualVersion = 0;
}
// If we're on webtv, the version supported is 2 (pre-summer2000,
// or 3, post-summer2000). Note that we don't bother sniffing varieties
// of webtv. You could if you were sadistic...
if(navigator.userAgent.indexOf("WebTV") != -1) actualVersion = 3;
// DEBUGGING: uncomment next line to display flash version
//alert("version detected: " + actualVersion);
// We're finished getting the version on all browsers that support
detection.
// Time to take the appropriate action.
// If the user has a new enough version...
if (actualVersion >= requiredVersion) {
// ...then we'll redirect them to the flash page, unless we've
// been told not to redirect.
if (useRedirect) {
// Need javascript1.1 to do location.replace
if(jsVersion > 1.0) {
// It's safe to use replace(). Good...we won't break the back
button.
window.location.replace(flashPage);
} else {
// JavaScript version is too old, so use .location to load the flash
page.
window.location = flashPage;
}
}
// If we got here, we didn't redirect. So we make a note that we should
// write out the object/embed tags later.
hasRightVersion = true;
} else {
// The user doesn't have a new enough version.
// If the redirection option is on, load the appropriate alternate page.
if (useRedirect) {
// Do the same .replace() call only if js1.1+ is available.
if(jsVersion > 1.0) {
window.location.replace((actualVersion >= 2) ? upgradePage :
noFlashPage);
} else {
window.location = (actualVersion >= 2) ? upgradePage : noFlashPage;
}
}
}
}
detectFlash(); // call our detector function now that it's safely loaded.
// -->
</script>
</head>
<body bgcolor="#FFFFFF" onload="detectFlash();">
<p>One moment please...</p>
<noscript>
<meta http-equiv="Refresh" content="4;URL=/doc.php?flash=false&doc=2" />
</noscript>
</body>
</html>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/tutor/attachments/20100405/f9e42403/attachment-0001.html>
More information about the Tutor
mailing list