fixed crashing when finding tables that do not contain links in the
columns
This commit is contained in:
parent
4742e417f7
commit
544e609d44
1 changed files with 13 additions and 9 deletions
|
|
@ -153,17 +153,21 @@ class CrawlObject():
|
|||
if len(items) == 4:
|
||||
_, name, _, download = items
|
||||
|
||||
link = download.find("a").attrs["href"]
|
||||
name = name.find("a").text + ".mp4" # rough estimate that all files are mp4
|
||||
url = urljoin(self.url, link)
|
||||
|
||||
link_element = download.find("a")
|
||||
name_element = name.find("a")
|
||||
if link_element is not None and name_element is not None:
|
||||
link = link_element.attrs["href"]
|
||||
name = name_element.text + ".mp4" # rough estimate that all files are mp4
|
||||
url = urljoin(self.url, link)
|
||||
|
||||
path = self.path / name
|
||||
path = self.path / name
|
||||
|
||||
if url in self.discovered:
|
||||
logging.info(f"{ url } already discovered, skipping it")
|
||||
continue
|
||||
self.discovered.add(url)
|
||||
subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered))
|
||||
if url in self.discovered:
|
||||
logging.info(f"{ url } already discovered, skipping it")
|
||||
continue
|
||||
self.discovered.add(url)
|
||||
subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered))
|
||||
|
||||
# download things from exercise sections
|
||||
exercise_div = content.find("div", class_="ilExcOverview")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue