fixed crashing when finding tables that do not contain links in the
columns
This commit is contained in:
parent
4742e417f7
commit
544e609d44
1 changed files with 13 additions and 9 deletions
|
|
@ -153,17 +153,21 @@ class CrawlObject():
|
||||||
if len(items) == 4:
|
if len(items) == 4:
|
||||||
_, name, _, download = items
|
_, name, _, download = items
|
||||||
|
|
||||||
link = download.find("a").attrs["href"]
|
|
||||||
name = name.find("a").text + ".mp4" # rough estimate that all files are mp4
|
|
||||||
url = urljoin(self.url, link)
|
|
||||||
|
|
||||||
path = self.path / name
|
link_element = download.find("a")
|
||||||
|
name_element = name.find("a")
|
||||||
|
if link_element is not None and name_element is not None:
|
||||||
|
link = link_element.attrs["href"]
|
||||||
|
name = name_element.text + ".mp4" # rough estimate that all files are mp4
|
||||||
|
url = urljoin(self.url, link)
|
||||||
|
|
||||||
if url in self.discovered:
|
path = self.path / name
|
||||||
logging.info(f"{ url } already discovered, skipping it")
|
|
||||||
continue
|
if url in self.discovered:
|
||||||
self.discovered.add(url)
|
logging.info(f"{ url } already discovered, skipping it")
|
||||||
subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered))
|
continue
|
||||||
|
self.discovered.add(url)
|
||||||
|
subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered))
|
||||||
|
|
||||||
# download things from exercise sections
|
# download things from exercise sections
|
||||||
exercise_div = content.find("div", class_="ilExcOverview")
|
exercise_div = content.find("div", class_="ilExcOverview")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue