Genius batch lyrics scraper

Download and get help for different MediaMonkey Addons.

Moderators: Peke, Gurus

mhendu
Posts: 100
Joined: Thu Jan 12, 2006 11:18 am
Contact:

Genius batch lyrics scraper

Post by mhendu »

Cobbled this together from a few other scripts. Needs a little work to properly encode all characters for Genius, but in my testing works fairly well. If you just want a 'hands off' batch script to populate lyrics from Genius run this, then for anything it misses you can follow up with Lyricator for a more comprehensive search.

Code: Select all

Sub Genius
  Dim list, itm, i, artisttemp, titletemp, lyricstemp, lyrics
  Dim geniusurl

  ' Get list of selected tracks from MediaMonkey
  Set list = SDB.CurrentSongList
  Dim prog : Set prog = SDB.Progress
  prog.Value = 0
  prog.MaxValue = list.Count
  If list.Count=0 Then
    Exit Sub
  End If
    ' Process all selected tracks
  For i=0 To list.count-1
	prog.Text = "Find Genius lyrics: Processing track "&(i+1)&" of "&(list.Count)&"..."
	lyricstemp = list.Item(i).Lyrics
	Do
	  SDB.ProcessMessages
      Set itm = list.Item(i)
	  If Len(itm.Lyrics) > 0 Then Exit Do
	  lyrics = ""
	  artisttemp = LCase(URLEncode(StripAccents(itm.ArtistName)))
	  titletemp = LCase(URLEncode(StripAccents(itm.Title)))
	  If InStr(titletemp,";") > 0 Then titletemp = Replace(titletemp,";","-")
	  ProcessLyrics artisttemp,titletemp,lyrics
	  If Len(lyrics) = 0 And (Left(artisttemp,4) = "the " Or Left(artisttemp,2) = "a " Or Left(artisttemp,3) = "an ") Then
		artisttemp = RemoveDefinite(artisttemp)
		ProcessLyrics artisttemp,titletemp,lyrics
	  End If
	  If Len(lyrics) > 0 Then itm.Lyrics = lyrics
	Loop While False
	If list.Item(i).Lyrics <> lyricstemp Then
		Dim z : Set z = SDB.NewSongList
		Call z.Add(list.Item(i))
		Call z.UpdateAll()
	End If
	prog.Increase
	If prog.Terminate Then
	  Exit Sub
	End If
	For a = 1 to 10
		Call SDB.Tools.Sleep(100)
		SDB.ProcessMessages
	Next
	Call SDB.Tools.Sleep(1000)
  Next
End Sub

Sub ProcessLyrics(artisttemp,titletemp,lyrics)
  If InStr(titletemp,"(") > 0 Or InStr(titletemp,"[") > 0 Then
	Dim titlenopar : titlenopar = Replace(Replace(Replace(Replace(titletemp,"(",""),")",""),"[",""),"]","")
	GeniusSearch artisttemp,titlenopar,lyrics
	If Len(lyrics) > 0 Then
		Exit Sub
	Else
		Call SDB.Tools.Sleep(1000)
		Dim RegEx : Set RegEx = New RegExp
		RegEx.IgnoreCase = True
		RegEx.MultiLine = True
		RegEx.Global = True
		RegEx.Pattern = "\([\s\S]*?\)"
		titletemp = RegEx.Replace(titletemp, "")
		RegEx.Pattern = "\[[\s\S]*?\]"
		titletemp = RegEx.Replace(titletemp, "")
   	    For i = 0 to 1000
			If InStr(titletemp,"--") > 0 Then
				titletemp = Replace(titletemp,"--","-")
			Else
				Exit For
			End If
		Next
		If Right(titletemp,1) = "-" Then titletemp = Left(titletemp,Len(titletemp)-1)
		GeniusSearch artisttemp,titletemp,lyrics
	End If
  Else
	GeniusSearch artisttemp,titletemp,lyrics
  End If
End Sub

Function URLEncode(ByVal str)
 Dim strTemp, strChar
 Dim intPos, intASCII
 str = Replace(str,"&","and")
 str = Replace(str,"'","")
 str = Replace(str,".","")
 str = Replace(str,",","")
 str = Replace(str,"!","")
 str = Replace(str,"#","")
 str = Replace(str,":","-")
 str = Replace(str,"%","")
 str = Replace(str,"?","")
 str = Replace(str,"""","""")
 strTemp = ""
 strChar = ""
 For intPos = 1 To Len(str)
  intASCII = Asc(Mid(str, intPos, 1))
  If intASCII = 32 Then
   strTemp = strTemp & "-"
  ElseIf Mid(str,intPos,1) = "(" Or Mid(str,intPos,1) = ")" Or Mid(str,intPos,1) = "[" Or Mid(str,intPos,1) = "]" Or Mid(str,intPos,1) = ";" Then
   strTemp = strTemp & Mid(str,intPos,1)
  ElseIf ((intASCII < 123) And (intASCII > 96)) Then
   strTemp = strTemp & Chr(intASCII)
  ElseIf ((intASCII < 91) And (intASCII > 64)) Then
   strTemp = strTemp & Chr(intASCII)
  ElseIf ((intASCII < 58) And (intASCII > 47)) Then
   strTemp = strTemp & Chr(intASCII)
  ElseIf ((intASCII < 256) And (intASCII > 191)) Then
   strTemp = strTemp & Chr(intASCII)
  ElseIf ((intASCII < 149) And (intASCII > 144)) Then
   strTemp = strTemp & Chr(intASCII)
  ElseIf ((intASCII < 47) And (intASCII > 44)) Then
   strTemp = strTemp & Chr(intASCII)
  ElseIf intASCII = 133 Then
   strTemp = strTemp & Chr(intASCII)
  Else
   strChar = Trim(Hex(intASCII))
   If intASCII < 16 Then
    strTemp = strTemp & "%0" & strChar
   Else
    strTemp = strTemp & "%" & strChar
   End If
  End If
 Next
 For i = 0 to 1000
	If InStr(strTemp,"--") > 0 Then
		strTemp = Replace(strTemp,"--","-")
	Else
		Exit For
	End If
 Next
 URLEncode = strTemp
End Function

Function StripAccents(str)
	accent   = "àèìòùÀÈÌÒÙäëïöüÄËÏÖÜâêîôûÂÊÎÔÛáéíóúÁÉÍÓÚðÐýÝãñõÃÑÕšŠžŽçÇåÅøØÿ/"
	noaccent = "aeiouAEIOUaeiouAEIOUaeiouAEIOUaeiouAEIOUdDyYanoANOsSzZcCaAoOy-"
	currentChar = ""
	result = ""
	k = 0
	o = 0	
	FOR k = 1 TO len(str)
		currentChar = mid(str,k, 1)
		o = InStr(accent, currentChar)
		IF o > 0 THEN
			result = result & mid(noaccent,o,1)
		ELSE
			result = result & currentChar
		END IF
	NEXT
	StripAccents = result
End Function

Sub GeniusSearch(artisttemp,titletemp,lyrics)
	Dim artisttemp1, titletemp1, lyricstemp
	If InStr(artisttemp,";") > 0 Then
		For i = 0 to UBound(split(artisttemp,";-"))
			artisttemp1 = split(artisttemp,";-")(i)
			geniusurl = "https://genius.com/" & artisttemp1 & "-" & titletemp & "-lyrics"
			set xmlhttp = createobject ("msxml2.xmlhttp.3.0")
			xmlhttp.open "get", geniusurl, false
			xmlhttp.send
			lyricstemp = lyrics
			lyrics = EvalLyrics(xmlhttp,lyricstemp)
			If Len(lyrics) > 0 Then
				Exit For
			ElseIf i < UBound(split(artisttemp,"; ")) Then
				Call SDB.Tools.Sleep(1000)
			End If
		Next
	Else
		geniusurl = "https://genius.com/" & artisttemp & "-" & titletemp & "-lyrics"
		set xmlhttp = createobject ("msxml2.xmlhttp.3.0")
		xmlhttp.open "get", geniusurl, false
		xmlhttp.send
		lyricstemp = lyrics
		lyrics = EvalLyrics(xmlhttp,lyricstemp)
	End If
End Sub

Function EvalLyrics(xmlhttp,lyrics)
	If InStr(xmlhttp.responseText,"render_404-headline") > 0 Then
		EvalLyrics = ""
	ElseIf InStr(xmlhttp.responseText,"Lyrics for this song have yet to be released. Please check back once the song has been released.") > 0 Then
		EvalLyrics = ""
	ElseIf InStr(xmlhttp.responseText,"<!--sse-->") > 1 Then
		lyrics = split(xmlhttp.responseText,"<!--sse-->")(2)
		lyrics = split(lyrics,"<!--/sse-->")(0)
		lyrics = split(lyrics,"<p>")(1)
		lyrics = split(lyrics,"</p>")(0)
		If InStr(lyrics,"<") > 0 Then
            Dim RegEx : Set RegEx = New RegExp
            RegEx.IgnoreCase = True
			RegEx.MultiLine = True
			RegEx.Global = True
            RegEx.Pattern = "<[\s\S]*?>"
            lyrics = RegEx.Replace(lyrics, "")
		End If
	End If
	If Len(lyrics) > 0 Then
		EvalLyrics = lyrics
	Else
		EvalLyrics = ""
	End If
End Function

Function RemoveDefinite(str)
	If Len(str) > 2 And LCase(Left(CStr(str),2)) = "a " Then str = Right(CStr(str),Len(str)-2)
	If Len(str) > 3 And LCase(Left(CStr(str),3)) = "an " Then str = Right(CStr(str),Len(str)-3)
	If Len(str) > 4 And LCase(Left(CStr(str),4)) = "the " Then str = Right(CStr(str),Len(str)-4)
	RemoveDefinite = str
End Function