[Tutor] Scraping gov site: site looking for Flash player

Roy Hinkelman royhink at gmail.com
Mon Apr 5 20:31:39 CEST 2010


Interesting.

I am using urllib2 to open some government pages, and they have some js
checking for Flash on my computer.

Is there a way to show them that I have flash? Or possibly another solution?

My code:
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent' : user_agent}
req = urllib2.Request(_URL, None, headers)
data = mechanize.urlopen(req)
_soup = B_S(data)

And what I get back from 'print _soup':
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head><title>Welcome to OurDocuments.gov</title>
<!-- Prevent caching in netscape. Very important because netscape tends to
mangle cached code -->
<meta http-equiv="expires" content="0" />
<meta http-equiv="Pragma" content="no-cache" />
<script language="JavaScript" type="text/javascript">

<!--

// moock fpi [f.lash p.layer i.nspector]

// version: 1.3.5

// written by colin moock

// code maintained at:
http://www.moock.org/webdesign/flash/detection/moockfpi/

// terms of use posted at: http://www.moock.org/terms/



//
=============================================================================

// These are the user defined globals.

// Modify the following variables to customize the inspection behaviour.



var requiredVersion = 6;   // Version the user needs to view site (max 6,
min 2)



var useRedirect = true;    // Flag indicating whether or not to load a
separate

                           // page based on detection results. Set to true
to

                           // load a separate page.



// Only set next three vars if useRedirect is true...which it is...

var flashPage = "/doc.php?flash=true&doc=2";     // The location of the
flash movie page

var noFlashPage = "/doc.php?flash=false&doc=2";  // Page displayed if the
user doesn't have the

                                      // plugin or we can't detect it.



var upgradePage = "/doc.php?flash=old&doc=2";  // Page displayed if we
detect an old plugin

//
=============================================================================



// *************

// Everything below this point is internal until after the BODY tag.

// Do not modify! Proceed to the BODY tag for further instructions.

// *************



// System globals

var flash2Installed = false;    // boolean. true if flash 2 is installed

var flash3Installed = false;    // boolean. true if flash 3 is installed

var flash4Installed = false;    // boolean. true if flash 4 is installed

var flash5Installed = false;    // boolean. true if flash 5 is installed

var flash6Installed = false;    // boolean. true if flash 6 is installed

var maxVersion = 6;             // highest version we can actually detect

var actualVersion = 0;          // will become the version the user really
has

var hasRightVersion = false;    // boolean. will become true if it's safe to
embed the flash movie in the page

var jsVersion = 1.0;            // the version of javascript supported



// -->

</script>
<script language="JavaScript1.1" type="text/javascript">

<!--



// Check the browser...we're looking for ie/win

var isIE = (navigator.appVersion.indexOf("MSIE") != -1) ? true : false;
// true if we're on ie

var isWin = (navigator.appVersion.indexOf("Windows") != -1) ? true : false;
// true if we're on windows



// This is a js1.1 code block, so make note that js1.1 is supported.

jsVersion = 1.1;



// Write vbscript detection on ie win. IE on Windows doesn't support regular

// JavaScript plugins array detection.

if(isIE && isWin) {

  document.write('<SCR' + 'IPT LANGUAGE=VBScript\> \n');

  document.write('on error resume next \n');

  document.write('flash2Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.2"))) \n');

  document.write('flash3Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.3"))) \n');

  document.write('flash4Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.4"))) \n');

  document.write('flash5Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.5"))) \n');

  document.write('flash6Installed =
(IsObject(CreateObject("ShockwaveFlash.ShockwaveFlash.6"))) \n');

  document.write('</scr' + 'ipt\> \n'); // break up end tag so it doesn't
end our script

}

// -->

</script>
<script language="JavaScript" type="text/javascript">

<!--

// Next comes the standard javascript detection that uses the

// navigator.plugins array. We pack the detector into a function so it loads

// before we call it.



function detectFlash() {

  // If navigator.plugins exists...

  if (navigator.plugins) {

    // ...then check for flash 2 or flash 3+.

    if (navigator.plugins["Shockwave Flash 2.0"] ||
navigator.plugins["Shockwave Flash"]) {



      // Some version of Flash was found. Time to figure out which.



      // Set convenient references to flash 2 and the plugin description.

      var isVersion2 = navigator.plugins["Shockwave Flash 2.0"] ? " 2.0" :
"";

      var flashDescription = navigator.plugins["Shockwave Flash" +
isVersion2].description;



      // DEBUGGING: uncomment next line to see the actual description.

      // alert("Flash plugin description: " + flashDescription);



      // A flash plugin-description looks like this: Shockwave Flash 4.0 r5

      // We can get the major version by grabbing the character before the
period

      // note that we don't bother with minor version detection here (since
we don't need to, and it is best accomplished from within an actual Flash
movie anyway).

      var flashVersion =
parseInt(flashDescription.charAt(flashDescription.indexOf(".") - 1));



// wow this is old now.  to accomodate flash players 10 (!!) and higher,
following code was added.  d.cole. 3/2010

      var flashVersion2 =
parseInt(flashDescription.charAt(flashDescription.indexOf(".") - 2));



    // We found the version, now set appropriate version flags. Make sure

      // to use >= on the highest version so we don't prevent future version

      // users from entering the site.

      flash2Installed = flashVersion == 2;

      flash3Installed = flashVersion == 3;

      flash4Installed = flashVersion == 4;

      flash5Installed = flashVersion == 5;

      flash6Installed = flashVersion >= 6;

// wow this is old now.  to accomodate flash players 10 (!!) and higher,
following code was added.  d.cole. 3/2010

      flash6Installed = flashVersion2 >= 1;

}

  }



  // Loop through all versions we're checking, and

  // set actualVersion to highest detected version.

  for (var i = 2; i <= maxVersion; i++) {

    if (eval("flash" + i + "Installed") == true) {

        actualVersion = i;

    }

  }



  if (!actualVersion) {

      actualVersion = 0;

  }



  // If we're on webtv, the version supported is 2 (pre-summer2000,

  // or 3, post-summer2000). Note that we don't bother sniffing varieties

  // of webtv. You could if you were sadistic...

  if(navigator.userAgent.indexOf("WebTV") != -1) actualVersion = 3;



  // DEBUGGING: uncomment next line to display flash version

  //alert("version detected: " + actualVersion);



  // We're finished getting the version on all browsers that support
detection.

  // Time to take the appropriate action.



  // If the user has a new enough version...

  if (actualVersion >= requiredVersion) {

    // ...then we'll redirect them to the flash page, unless we've

    // been told not to redirect.

    if (useRedirect) {

      // Need javascript1.1 to do location.replace

      if(jsVersion > 1.0) {

        // It's safe to use replace(). Good...we won't break the back
button.

        window.location.replace(flashPage);

      } else {

        // JavaScript version is too old, so use .location to load the flash
page.

        window.location = flashPage;

      }

    }



    // If we got here, we didn't redirect. So we make a note that we should

    // write out the object/embed tags later.

    hasRightVersion = true;

  } else {

    // The user doesn't have a new enough version.

    // If the redirection option is on, load the appropriate alternate page.

    if (useRedirect) {

      // Do the same .replace() call only if js1.1+ is available.

      if(jsVersion > 1.0) {

        window.location.replace((actualVersion >= 2) ? upgradePage :
noFlashPage);

      } else {

        window.location = (actualVersion >= 2) ? upgradePage : noFlashPage;

      }

    }

  }

}



detectFlash();  // call our detector function now that it's safely loaded.



// -->

</script>
</head>
<body bgcolor="#FFFFFF" onload="detectFlash();">
<p>One moment please...</p>
<noscript>
<meta http-equiv="Refresh" content="4;URL=/doc.php?flash=false&amp;doc=2" />
</noscript>
</body>
</html>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/tutor/attachments/20100405/f9e42403/attachment-0001.html>


More information about the Tutor mailing list