so I am scraping javascript from a site and it returns the below code, but if course this will not show the flash video or render the javscript since im just using simple php dom parsers to return the HTML. Is there a way to run this javascript to return the embedded object it outputs?
<script type="text/javascript">
var attributes = {};
attributes.id = "flashMovie";
var flashvars = {};
flashvars.startjs = "playerLoaded";
flashvars.activeColor = "83A7D2";
flashvars.themeColor = "FFFFFF";
flashvars.config = escape("http://example/0a1cee42025e9e49d25d.fid?key=c3e868caa037531d0d709e238d93013a&VID=189988&catID=1,26,43,50&rollover=1&startThumb=19&embed=&utm_source=&multiview=0&premium=1&country=&user=0&vip=0&heightHD=480p&cd=u&ref=browse");
//flashvars.config = escape("http://example.com/0a1cee42025e9e49d25d.fid?key=c3e868caa037531d0d709e238d93013a&VID=189988&catID=1,26,43,50&rollover=1&startThumb=19&premium=1&country=&user=0&vip=0&cd=u&ref=browse");
flashvars.config2 = escape("http://www.example.com/player_feed_local.php?vid=189988&CHIDS=1,26,43,50&link=http%253A%252F%252Fwww.example.com%252Fjump%252FTesting-Video%252Fvideo189988%253Fref%253Dbrowse");
var params = {};
params.startjs = "playerLoaded";
params.loop = "false";
params.quality = "best";
params.bgcolor = "#000000";
params.allowfullscreen = "true";
params.allowscriptaccess = "always";
params.wmode= "opaque";
swfobject.embedSWF("http://www.example.com/Player_v1.11.9.7.swf?v=1.0", "flashMovie", "100%", "500", "9", "expressInstall.swf", flashvars, params, attributes);
</script>
-
2You mean, return an image representing the object, as if it were on the page? You can't take something out of its page context reliably. you will need to render the whole page.Brad– Brad2011年11月03日 20:55:14 +00:00Commented Nov 3, 2011 at 20:55
-
Which programming language are you using to parse the HTML?Anderson Green– Anderson Green2013年05月10日 19:27:22 +00:00Commented May 10, 2013 at 19:27
2 Answers 2
You can run Javascript just like a browser (with a real DOM and all of that) and extract data out using tools like PhantomJS or Crowbar.
1 Comment
Use a browser render engine like webkit to execute the JavaScript, and then you can extract the resulting html.
Here is some example code: http://webscraping.com/blog/Scraping-JavaScript-webpages-with-webkit/