Friday, April 16, 2010

Extract Domain name from URL

The following query extracts domain name from the URL

Example:


URL :http://www.shopzilla.com/sharp-lc46e77u-46-in-1080p-lcd-hdtv/1029258582/compare

Domain : www.shopzilla.com

-- Set Domain_name

update Computers_and_Software
set Domain_Name = a.domain
from
(select Product_URL , replace(substring(Product_URL , 0 , charindex('/' , replace(Product_URL , '//' , '||'))), 'http://' , '') as domain from Computers_and_Software )a
where Computers_and_Software.Product_URL = a.Product_URL



-- Step1
update Computers_and_Software
set Domain_Name = a.domain
from
(
select Product_URL , replace(substring(Product_URL , 0 , charindex('//' , replace(Product_URL , 'http://' , '|||||||'))), 'http://' , '') as domain from Computers_and_Software
where Domain_Name = ' '
) a
where Computers_and_Software.Product_URL = a.Product_URL


--Step2
update Computers_and_Software
set Domain_Name = a.domain
from
(
select Product_URL , replace(substring(Product_URL , 0 , charindex('?' , replace(Product_URL , 'http://' , '|||||||'))), 'http://' , '') as domain from Computers_and_Software
where Domain_Name = ' '
) a
where Computers_and_Software.Product_URL = a.Product_URL


select * from Computers_and_Software
where Domain_Name = ' '

--Step3
update Computers_and_Software
set Domain_Name = Product_URL
where Domain_Name = ' '

2 comments:

  1. This comment has been removed by the author.

    ReplyDelete
  2. Try this instead...

    ALTER FUNCTION dbo.spExtractDomainFromURL ( @strURL NVARCHAR(1000) )
    RETURNS NVARCHAR(100)
    --Posted at http://stackoverflow.com/a/20808097/391101
    AS
    BEGIN
    --Strip Protocol
    SELECT @strURL = SUBSTRING(@strURL, CHARINDEX('://', @strURL) + 3, 999)

    -- Strip www subdomain
    IF LEFT(@strURL, 4) = 'www.'
    SELECT @strURL = SUBSTRING(@strURL, 5, 999)

    -- Strip Path
    IF CHARINDEX('/', @strURL) > 0
    SELECT @strURL = LEFT(@strURL, CHARINDEX('/', @strURL) - 1)

    --Unless you iterate through a list of TLDs, you can't differentiate between subdomain.example.com and example.com.au
    --I decided against this because it's slower, and the TLD list requires maintenance

    RETURN @strURL
    END

    ReplyDelete